## Improvement from QA_v2

### 1. Use transformer model for sentence encodings
### 2. Use fine-tuned sentiment model for sentence politeness
### 3. Working on assigning score for each category on the QA guidelines
### 4. Modularize and systemize the customized POS 

## Outline
### 1. Input transcripts and grammars
### 2. Sentencizer transcripts
### 3. Mapping sentences to each category
### 4. Calculate sentiment score for each part
### 5. Measure passed or not for each category

## To Do
### 1. Enable multi-label category mapping

In [1]:
# Load packages
import nltk
import spacy
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import torch.nn

import sys
sys.path.insert(0, '/Users/leon/Income/python files/politeness_code')

# Load customized file
from helper.customized_pos import *
from helper.pos_helper import *
from helper.grammar import *
from helper.aspect_matching import *
from helper.sentiment_helper import *
from helper.score_generation import *
from model_code.distilbert import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# model path
sentiment_path = "../model_collection/sentiment_model/fine_tune_disbert.pt"

In [3]:
# Load model
sentiment_tokenizer = AutoTokenizer.from_pretrained("../model_collection/sentiment_tokenizer")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("../model_collection/sentiment_model/")
sentiment_model = disbert_arch(sentiment_model)
sentiment_model.load_state_dict(torch.load(sentiment_path))

nlp = spacy.load('en_core_web_lg')
sentence_model = SentenceTransformer("../model_collection/sentence_model")

In [4]:
# Add customized rules to the spacy model
add_pos(pos_ls, nlp)

### Section1: Input transcripts and grammars

In [5]:
with open ("data_collection/greeting_transcripts.txt") as f:
    greeting_transcripts_ls = f.readlines()
with open ("data_collection/ending_transcripts.txt") as f:
    ending_transcripts_ls = f.readlines()

In [6]:
print("greeting transcripts", greeting_transcripts_ls)
print("\n")
print("ending transcripts", ending_transcripts_ls)

greeting transcripts ['ya hello good afternoon speak to nanny seah please afternoon miss nanny my name is jaguar shao and im actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty\n', 'hello good afternoon just speak to miss leon michael from income ntuc free for one or two minutes if you are not busy okay calling behalf of your adviser xiao guo okay because we having this anniversary plan for the family i just check again you are single or married\n', 'Okay this is alice here coming from income NTUC income okay currently we partnership with ocbc plus can I check with you if you a good time to speak for a while first of course thank you for using ocbc okay and then secondly as a valued customer we would like to actually update and also give you a priority okay becaus

### Section2: Sentencizer transcripts

In [7]:
greeting_text_ls = []
print("Greeting transcripts")
for i, transcripts in enumerate(greeting_transcripts_ls):
    full_result = nlp_sentencizer(transcripts, grammars, nlp)
    text_result = list(map(lambda x: x[1], full_result))
    greeting_text_ls.append(text_result)
    print("Current transcript id: ", i)
    print("full result: ", full_result)
    print("\n")
    print("only text result: ", text_result)
    print("\n")

Greeting transcripts
Current transcript id:  0
full result:  [['Sentence', 'ya hello good afternoon speak to nanny seah'], ['Sentence', 'please afternoon miss nanny my name is jaguar shao and'], ['Sentence', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and'], ['Sentence', 'as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']]


only text result:  ['ya hello good afternoon speak to nanny seah', 'please afternoon miss nanny my name is jaguar shao and', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and', 'as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']


Current transcript id:  1
full result:  [['Sentence', 'hello good 

In [8]:
ending_text_ls = []
print("Ending transcripts")
for i, transcripts in enumerate(ending_transcripts_ls):
    full_result = nlp_sentencizer(transcripts, grammars, nlp)
    text_result = list(map(lambda x: x[1], full_result))
    ending_text_ls.append(text_result)
    print("Current transcript id: ", i)
    print("full result: ", full_result)
    print("\n")
    print("only text result: ", text_result)
    print("\n")

Ending transcripts
Current transcript id:  0
full result:  [['Sentence', 'You just reconfirm with your husband whether you already have an enhanced home insurance or'], ['Sentence', 'not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n']]


only text result:  ['You just reconfirm with your husband whether you already have an enhanced home insurance or', 'not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n']


Current transcript id:  1
full result:  [['Sentence', 'So maybe if your friends or relatives or family members are interested you'], ['Sentence', 'can call back at this number lah this number you'], ['Sentence', 'can see from your phone lah thank you bye bye \n']]


only text result:  ['So maybe if your friends or relatives or family members are interested you', 'can call back at this number lah this number you', 'can see from your phone lah thank you bye bye \n']


Current transcr

### Section3: Mapping sentence to each category

In [9]:
# Looping to store mapping result to dic
print("Greeting result mapping")
mapping_greeting = []
for i, text in enumerate(greeting_text_ls):
    category_ls = nlp_aspect_matching(text, sentence_model, 'greeting', True, 0.4)
    print(category_ls)
    print("\n")
    mapping_greeting.append(category_ls)
print("Ending result mapping")
mapping_ending = []
for i, text in enumerate(ending_text_ls):
    category_ls = nlp_aspect_matching(text, sentence_model, 'ending', True, 0.4)
    print(category_ls)
    print("\n")
    mapping_ending.append(category_ls)

Greeting result mapping
[['opening', 'ya hello good afternoon speak to nanny seah'], ['no_matching', 'please afternoon miss nanny my name is jaguar shao and'], ['purpose_of_call', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n']]


[['opening', 'hello good afternoon just speak to miss leon michael from income ntuc free for one or two minutes if you are not busy'], ['no_matching', 'okay calling behalf of your adviser xiao guo'], ['purpose_of_call', 'okay because we having this anniversary plan for the family i just check again you are single or married \n']]


[['opening', 'Okay this is alice here coming from income NTUC income'], ['no_matching', 'okay currently we partnership with ocbc plus'], ['opening', 'can I check with you if you a good time to sp

### Section4: Calculate sentiment score for each part

In [10]:
print("Greeting part sentiments")
sentiment_greeting = []
for mapping in mapping_greeting:
    sentiment_ls = nlp_sentiment(mapping, sentiment_tokenizer, sentiment_model)
    sentiment_greeting.append(sentiment_ls)
    print(sentiment_ls)
    print("\n")

print("Ending part sentiments")
sentiment_ending = []
for mapping in mapping_ending:
    sentiment_ls = nlp_sentiment(mapping, sentiment_tokenizer, sentiment_model)
    sentiment_ending.append(sentiment_ls)
    print(sentiment_ls)
    print("\n")  

Greeting part sentiments
[['opening', 'ya hello good afternoon speak to nanny seah', 'positive'], ['no_matching', 'please afternoon miss nanny my name is jaguar shao and', 'neutral'], ['purpose_of_call', 'i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n', 'neutral']]


[['opening', 'hello good afternoon just speak to miss leon michael from income ntuc free for one or two minutes if you are not busy', 'neutral'], ['no_matching', 'okay calling behalf of your adviser xiao guo', 'positive'], ['purpose_of_call', 'okay because we having this anniversary plan for the family i just check again you are single or married \n', 'neutral']]


[['opening', 'Okay this is alice here coming from income NTUC income', 'neutral'], ['no_matching', 'okay currently we partners

### Section5: Measure passed or not for each category

In [11]:
greeting_result_ls = []
print("Greeting result")
for i, result in enumerate(sentiment_greeting):
    result_dic = result_reformat(result, greeting_lexicons)
    score_dic = assign_score(result_dic, greeting_lexicons)
    temp_dic = {"sentence_collection": result_dic, "score_collection": score_dic}
    print('current transcript id ', i)
    print('sentence: ', temp_dic['sentence_collection'])
    print('grading: ', temp_dic['score_collection'])
    print("\n")
    greeting_result_ls.append(temp_dic)

Greeting result
current transcript id  0
sentence:  {'opening': [['ya hello good afternoon speak to nanny seah', 'positive']], 'no_matching': [['please afternoon miss nanny my name is jaguar shao and', 'neutral']], 'purpose_of_call': [['i m actually calling from insurance company miss nanny good time to speak for a while this is regarding our partnership charter plus members and as a result of that insurance company actually formulated a very special thirtieth anniversay insurance bundle right called the i thirty \n', 'neutral']]}
grading:  {'opening': True, 'purpose_of_call': True, 'ask_for_permission': False}


current transcript id  1
sentence:  {'opening': [['hello good afternoon just speak to miss leon michael from income ntuc free for one or two minutes if you are not busy', 'neutral']], 'no_matching': [['okay calling behalf of your adviser xiao guo', 'positive']], 'purpose_of_call': [['okay because we having this anniversary plan for the family i just check again you are single 

In [12]:
ending_result_ls = []
print("Ending result")
for i, result in enumerate(sentiment_ending):
    result_dic = result_reformat(result, ending_lexicons)
    score_dic = assign_score(result_dic, ending_lexicons)
    temp_dic = {"sentence_collection": result_dic, "score_collection": score_dic}
    print('current transcript id ', i)
    print('sentence: ', temp_dic['sentence_collection'])
    print('grading: ', temp_dic['score_collection'])
    print("\n")
    ending_result_ls.append(temp_dic)

Ending result
current transcript id  0
sentence:  {'no_matching': [['You just reconfirm with your husband whether you already have an enhanced home insurance or', 'positive']], 'follow-up': [['not if already have then we can not cover for you oh ok I will call you back tomorrow thank you bye \n', 'neutral']]}
grading:  {'closing': False, 'data_enrichment': False, 'follow-up': True}


current transcript id  1
sentence:  {'no_matching': [['So maybe if your friends or relatives or family members are interested you', 'positive']], 'follow-up': [['can call back at this number lah this number you', 'neutral']], 'closing': [['can see from your phone lah thank you bye bye \n', 'positive']]}
grading:  {'closing': True, 'data_enrichment': False, 'follow-up': True}


current transcript id  2
sentence:  {'no_matching': [['Maybe in the future you interested in the annual travel you', 'positive']], 'closing': [['can get back to us again thank you bye \n', 'positive']]}
grading:  {'closing': True, 'd