## Improvement from QA_v1:
### 1. Change the politeness model to the sentiment model, which is powered by Bert and works better

### 2. Test more cases for completeness

## Outline:
### 1. Input transcripts and grammars

### 2. Sentencizer transcripts

### 3. Mapping sentences to each category

### 4. Calculate sentiment score for each part

In [1]:
# Load packages
import nltk
import spacy

# Load customized packages
from customized_pos import *
from pos_helper import *
from aspect_matching import *
from politeness_helper import *

# Load spacy nlp model
nlp = spacy.load('en_core_web_lg')
nlp.add_pipe(pos_postprocessor_pipe, name="pos_postprocessor", after='tagger')

### Section 1: Input transcripts and grammars

In [2]:
# Load from text file
with open ("data_collection/transcripts.txt") as f:
    transcripts_ls = f.readlines()

In [3]:
transcripts_ls

['ya hello good afternoon speak to nanny seah please afternoon miss nanny my name is jaguar shao and im actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty\n',
 'hello good afternoon just speak to miss leon michael from income ntuc free for one_or_two minutes if you are not busy okay calling behalf of your adviser xiao guo okay because we having this anniversary plan for the family i just check again you are single_or_married\n']

In [13]:
grammars = r""" 
    JP: {<JJ.*>}
    NP: {<JP|CD>*<PRP.*|DT|NN.*>+}
    PP: {<IN|TO|RP><NP|VB.*>} 
    VP: {<VB.*|RB.*>+<JP>*}
    VP: {<VB.*|RB.*>+<PP|NP>*}
    Sentence: {<UH>*<JP|NP>+<MD|IN>*<VP|PP|NP>+}
    Sentence: {<PP|VP|NP|JP>+}
    Question: {<MD|WDT|DP|WRB|><MD>*<Sentence|NP|PP|VP|JP>}
"""

### Section 2: Sentencizer transcripts

In [16]:
# Loop to sentencizer each sentences in the transcript_ls
text_result_dic = {}
for i, transcripts in enumerate(transcripts_ls):
    full_result = nlp_sentencizer(transcripts, grammars, nlp)
    text_result = list(map(lambda x: x[1], full_result))
    text_result_dic[i] = text_result
    print("Current transcript id: ", i)
    print("full result: ", full_result)
    print("\n")
    print("only text result: ", text_result)
    print("\n")

Current transcript id:  0
full result:  [('Sentence', 'ya hello good afternoon speak to nanny seah'), ('Sentence', 'please afternoon miss nanny my name is jaguar shao and'), ('Sentence', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and'), ('Sentence', 'as a result of that insurance company actually formulated'), ('Sentence', 'a very special thirtieth anniversay insurance bundle right called the i thirty \n')]


only text result:  ['ya hello good afternoon speak to nanny seah', 'please afternoon miss nanny my name is jaguar shao and', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and', 'as a result of that insurance company actually formulated', 'a very special thirtieth anniversay insurance bundle right called the i thirty \n']


Current transcript id:  1
full result:  [('Sentence', 'hello good a

### Section 3: Mapping sentence to each category

In [20]:
# Looping to store mapping result to dic
mapping_result_dic = {}
for i, text in text_result_dic.items():
    print(text)
    category_ls = nlp_aspect_matching(text, nlp, 'greeting', True)
    print(category_ls)
    mapping_result_dic[i] = category_ls

['ya hello good afternoon speak to nanny seah', 'please afternoon miss nanny my name is jaguar shao and', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and', 'as a result of that insurance company actually formulated', 'a very special thirtieth anniversay insurance bundle right called the i thirty \n']
[('opening', 'ya hello good afternoon speak to nanny seah please afternoon miss nanny my name is jaguar shao and'), ('purpose_of_call', ' as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n')]
['hello good afternoon just speak to miss leon michael from income ntuc', 'free for one_or_two minutes if you are not busy okay', 'calling behalf of your adviser xiao guo okay', 'because we having this anniversary plan for the family i just check again you are single_or_married \n']
[('opening', 'hello good afternoon 