## Improvement from QA_v1:
### 1. Change the politeness model to the sentiment model, which is powered by Bert and works better

### 2. Test more cases for completeness

## Outline:
### 1. Input transcripts and grammars

### 2. Sentencizer transcripts

### 3. Mapping sentences to each category

### 4. Calculate sentiment score for each part

### To Do

#### 1. Solve bugs in aspect matching, and add some examples to each aspect
#### 2. Blur the boundary between different labeled classes (say the prob for classes are 75% and 80%, we can categorize it to both the classes)
#### 3. Retrain the sentiment model
#### 4. Using more systematic way for customized pos (spacy)

In [11]:
# Load packages
import nltk
import spacy
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load customized packages
from helper.customized_pos import *
from helper.pos_helper import *
from helper.aspect_matching import *
from helper.politeness_helper import *
from helper.sentiment_helper import *
from model_code.distilbert import *
# Load spacy nlp model
nlp = spacy.load('en_core_web_lg')
# spacy version = 2.3.5 can use the following line of sentence
# nlp.add_pipe(pos_postprocessor_pipe, name="pos_postprocessor", after='tagger')
nlp.add_pipe("pos_postprocessor_pipe", after='tagger')

<function customized_pos.pos_postprocessor_pipe(doc)>

In [2]:
# Load sentiment model
tokenizer = AutoTokenizer.from_pretrained("Souvikcmsa/SentimentAnalysisDistillBERT")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("Souvikcmsa/SentimentAnalysisDistillBERT")

### Section 1: Input transcripts and grammars

In [3]:
# Load from text file
with open ("../data_collection/greeting_transcripts.txt") as f:
    greeting_transcripts_ls = f.readlines()
with open ("../data_collection/ending_transcripts.txt") as f:
    ending_transcripts_ls = f.readlines()

In [4]:
print("greeting transcripts", greeting_transcripts_ls)
print("\n")
print("ending transcripts", ending_transcripts_ls)

greeting transcripts ['ya hello good afternoon speak to nanny seah please afternoon miss nanny my name is jaguar shao and im actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty\n', 'hello good afternoon just speak to miss leon michael from income ntuc free for one_or_two minutes if you are not busy okay calling behalf of your adviser xiao guo okay because we having this anniversary plan for the family i just check again you are single_or_married\n']


ending transcripts ['You just reconfirm with your husband whether you already have an enhanced home insurance or not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye\n', 'So maybe if your friends or relatives or family members are interested you can call back at this num

In [5]:
grammars = r""" 
    JP: {<JJ.*>}
    NP: {<JP|CD>*<PRP.*|DT|NN.*>+}
    PP: {<IN|TO|RP><NP|VB.*>} 
    VP: {<VB.*|RB.*>+<PP|NP>*}
    Sentence: {<UH>*<JP|NP>*<MD|IN>*<VP|PP|NP|JP>+}
    Question: {<MD|WDT|DP|WRB|><MD>*<Sentence|NP|PP|VP|JP>}
"""

### Section 2: Sentencizer transcripts

In [6]:
# Loop to sentencizer each sentences in the transcript_ls
text_result_dic = {}
print("Greeting transcripts")
for i, transcripts in enumerate(greeting_transcripts_ls):
    full_result = nlp_sentencizer(transcripts, grammars, nlp)
    text_result = list(map(lambda x: x[1], full_result))
    text_result_dic[i] = text_result
    print("Current transcript id: ", i)
    print("full result: ", full_result)
    print("\n")
    print("only text result: ", text_result)
    print("\n")

Greeting transcripts
Current transcript id:  0
full result:  [['Sentence', 'ya hello good afternoon speak to nanny seah'], ['Sentence', 'please afternoon miss nanny my name is jaguar shao and'], ['Sentence', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and'], ['Sentence', 'as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']]


only text result:  ['ya hello good afternoon speak to nanny seah', 'please afternoon miss nanny my name is jaguar shao and', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and', 'as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']


Current transcript id:  1
full result:  [['Sentence', 'hello good 

In [7]:
# Ending transcripts
text_result_dic1 = {}
print("Ending transcripts")
for i, transcripts in enumerate(ending_transcripts_ls):
    full_result = nlp_sentencizer(transcripts, grammars, nlp)
    text_result = list(map(lambda x: x[1], full_result))
    text_result_dic1[i] = text_result
    print("Current transcript id: ", i)
    print("full result: ", full_result)
    print("\n")
    print("only text result: ", text_result)
    print("\n")

Ending transcripts
Current transcript id:  0
full result:  [['Sentence', 'You just reconfirm with your husband whether you already have an enhanced home insurance or'], ['Sentence', 'not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n']]


only text result:  ['You just reconfirm with your husband whether you already have an enhanced home insurance or', 'not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n']


Current transcript id:  1
full result:  [['Sentence', 'So maybe if your friends or relatives or family members are interested you'], ['Sentence', 'can call back at this number lah this number you'], ['Sentence', 'can see from your phone lah thank you bye bye \n']]


only text result:  ['So maybe if your friends or relatives or family members are interested you', 'can call back at this number lah this number you', 'can see from your phone lah thank you bye bye \n']




### Section 3: Mapping sentence to each category

In [8]:
# Looping to store mapping result to dic
print("Greeting result mapping")
mapping_result_ls = []
for i, text in text_result_dic.items():
    category_ls = nlp_aspect_matching(text, nlp, 'greeting', True)
    print(category_ls)
    print("\n")
    mapping_result_ls.append(category_ls)
print("Ending result mapping")
mapping_result_ls1 = []
for i, text in text_result_dic1.items():
    category_ls = nlp_aspect_matching(text, nlp, 'ending', True)
    print(category_ls)
    print("\n")
    mapping_result_ls1.append(category_ls)

Greeting result mapping
[['ask_for_permission', 'ya hello good afternoon speak to nanny seah'], ['purpose_of_call', 'please afternoon miss nanny my name is jaguar shao and i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']]


[['opening', 'hello good afternoon just speak to miss leon michael from income ntuc free for one_or_two minutes if you are not busy'], ['purpose_of_call', 'okay calling behalf of your adviser xiao guo'], ['ask_for_permission', 'okay because we having this anniversary plan for the family i just check again you are single_or_married \n']]


Ending result mapping
[['follow-up', 'You just reconfirm with your husband whether you already have an enhanced home insurance or'], ['closing', 'not if already have then we can not cover for you o

### Section 4: Calculate sentiment score for each part

In [13]:
# use updated model
new_model = disbert_arch(sentiment_model)
path = 'model_collection/fine_tune_disbert.pt'
new_model.load_state_dict(torch.load(path))

<All keys matched successfully>

In [14]:
print("Greeting part sentiments")
for mapping_result in mapping_result_ls:
    sentiment_ls = nlp_sentiment(mapping_result, tokenizer, new_model)
    print(sentiment_ls)
    print("\n")

Greeting part sentiments
[['ask_for_permission', 'ya hello good afternoon speak to nanny seah', 'positive'], ['purpose_of_call', 'please afternoon miss nanny my name is jaguar shao and i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter_plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n', 'neutral']]


[['opening', 'hello good afternoon just speak to miss leon michael from income ntuc free for one_or_two minutes if you are not busy', 'neutral'], ['purpose_of_call', 'okay calling behalf of your adviser xiao guo', 'positive'], ['ask_for_permission', 'okay because we having this anniversary plan for the family i just check again you are single_or_married \n', 'neutral']]




In [16]:
print("Ending part sentiments")
for mapping_result in mapping_result_ls1:
    sentiment_ls = nlp_sentiment(mapping_result, tokenizer, new_model)
    print(sentiment_ls)
    print("\n")

Ending part sentiments
[['follow-up', 'You just reconfirm with your husband whether you already have an enhanced home insurance or', 'positive'], ['closing', 'not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n', 'neutral']]


[['follow-up', 'So maybe if your friends or relatives or family members are interested you can call back at this number lah this number you', 'neutral'], ['closing', 'can see from your phone lah thank you bye bye \n', 'positive']]


