## Outline

1. Input the whole transcipts
2. Cut the first several lines as opening, and the last several lines as closing
3. Do sentence seperation, category matching, sentiment analysis on each part
4. Save the score config file

In [1]:
# Load packages
import nltk
import spacy
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import torch.nn

import sys
sys.path.insert(0, '/Users/leon/Income/python files/Telesales-QA-Framework')
import os

# Load customized file
from helper.customized_pos import *
from helper.pos_helper import *
from helper.grammar import *
from helper.aspect_matching import *
from helper.sentiment_helper import *
from helper.score_generation import *
from helper.text_preprocessing import *
from model_code.distilbert import *
from helper.config_helper import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# model path
sentiment_path = "../model_collection/sentiment_model/fine_tune_disbert.pt"
transcript_name = "VL220822.txt"
# transcript path
transcript_path = os.path.join("transcripts", transcript_name)

In [3]:
# Load model
sentiment_tokenizer = AutoTokenizer.from_pretrained("../model_collection/sentiment_tokenizer")
sentiment_model = AutoModelForSequenceClassification.from_pretrained("../model_collection/sentiment_model/")
sentiment_model = disbert_arch(sentiment_model)
sentiment_model.load_state_dict(torch.load(sentiment_path))

nlp = spacy.load('en_core_web_lg')
sentence_model = SentenceTransformer("../model_collection/sentence_model")

In [4]:
# Add customized rules to the spacy model
add_pos(pos_ls, nlp)

In [5]:
# Load config
general_config = load_yaml('config/general_config.yaml')
threshold_config = general_config[0]['similarity_threshold']

## Input transcripts and text processing

In [6]:
with open(transcript_path) as f:
    transcript = f.read()
# Text preprocessing
transcript = text_preprocessing(transcript)

In [7]:
print(transcript)

okay this is alice here coming from income ntuc income okay currently we partnership with ocbc plus can i check with you if you a good time to speak for a while first of course thank you for using ocbc okay and then secondly as a valued customer we would like to actually update and also give you a priority okay because in year twenty twenty right income actually celebrate our fiftieth annivesary okay and then in that particular year we actually came out a very very special anniversary policy called i fifty okay of course i stand for income income fifty means for fifty years alright okay miss junar even you already have a existing policy right okay then we even make it very very special because we can cover for your whole family member at one flat premium okay and this regardless of your age secondly right we garden to how many dependent are children you have as well as regardless of your occupation and also your occupation economic security is an important pillar of national and region

## Find opening and closing parts

In [8]:
opening = transcript[:1000]
opening

'okay this is alice here coming from income ntuc income okay currently we partnership with ocbc plus can i check with you if you a good time to speak for a while first of course thank you for using ocbc okay and then secondly as a valued customer we would like to actually update and also give you a priority okay because in year twenty twenty right income actually celebrate our fiftieth annivesary okay and then in that particular year we actually came out a very very special anniversary policy called i fifty okay of course i stand for income income fifty means for fifty years alright okay miss junar even you already have a existing policy right okay then we even make it very very special because we can cover for your whole family member at one flat premium okay and this regardless of your age secondly right we garden to how many dependent are children you have as well as regardless of your occupation and also your occupation economic security is an important pillar of national and regio

In [9]:
ending = transcript[-600:]
ending

'er electronics so the policy will be effective tokay lah you mean this policy i help you apply right now once i submit the application and then i sent you the policy number sure meaning it is approved but we will deduct the premium within three working days you will receive the policy documents via sms or email in three working days please ensure all information is correct lets say if you have any issues or questions you can call me again yah ok do you still have any questions for me i will just help you with the application and send you the policy information shortly thank you so much bye bye'

## Sentencizer

In [10]:
open_sentence = nlp_sentencizer_v2(opening, nlp)
open_sentence

[' okay this is alice here coming from income ntuc income',
 ' okay currently we partnership with ocbc plus can i check with you if you a good time to speak for a while first of course thank you for using ocbc',
 'okay and then secondly as a valued customer we would like to actually update and also give you a priority okay',
 ' because in year twenty twenty right income actually celebrate our fiftieth annivesary okay',
 ' and then in that particular year we actually came out a very very special anniversary policy called i fifty',
 ' okay of course i stand for income income fifty means for fifty years',
 'alright okay miss junar even you already have a existing policy',
 'right okay then we even make it very very special because we can cover for your whole family member at one flat premium',
 'okay and this regardless of your age secondly',
 ' right we garden to how many dependent are children you have as well as regardless of your occupation and also your occupation economic security i

In [13]:
end_sentence = nlp_sentencizer_v2(ending, nlp)
end_sentence

[' er electronics so the policy will be effective',
 ' tokay lah you mean this policy i help you apply right now once i submit the application',
 ' and then i sent you the policy number sure meaning it is approved but we will deduct the premium within three working days you will receive the policy documents via sms or email in three working days please ensure all information is correct lets say if you have any issues or questions you can call me again',
 ' yah ok do you still have any questions for me i will just help you with the application and send you the policy information shortly thank you so much bye bye']

## Category Matching

In [14]:
open_matched_result, open_sentence_ls, open_label_ls, open_similarity_ls = batch_match_multi_categories(open_sentence, sentence_model, threshold_config[0]['opening'], opening_lexicons)

In [15]:
open_matched_result

[[' okay this is alice here coming from income ntuc income',
  ['standard_greeting', 'purpose_of_call']],
 [' okay currently we partnership with ocbc plus can i check with you if you a good time to speak for a while first of course thank you for using ocbc',
  ['standard_greeting', 'purpose_of_call']],
 ['okay and then secondly as a valued customer we would like to actually update and also give you a priority okay',
  ['standard_greeting', 'purpose_of_call']],
 [' because in year twenty twenty right income actually celebrate our fiftieth annivesary okay',
  ['no matching']],
 [' and then in that particular year we actually came out a very very special anniversary policy called i fifty',
  ['purpose_of_call']],
 [' okay of course i stand for income income fifty means for fifty years',
  ['purpose_of_call']],
 ['alright okay miss junar even you already have a existing policy',
  ['standard_greeting', 'purpose_of_call']],
 ['right okay then we even make it very very special because we can

In [18]:
open_similarity_ls

[[0.42427972, 0.31466082, 0.20223577],
 [0.33511627, 0.3419063, 0.330265],
 [0.30356756, 0.4274702, 0.2733118],
 [0.17577949, 0.27646947, 0.09803732],
 [0.0964587, 0.45960838, 0.09786334],
 [0.24849632, 0.34048322, 0.10549793],
 [0.2883965, 0.28186268, 0.24810791],
 [0.055603635, 0.53493875, 0.08320273],
 [0.22202629, 0.17394884, 0.28677443],
 [0.1452643, 0.25098944, 0.1444247]]

In [16]:
close_matched_result, close_sentence_ls, close_label_ls, close_similarity_ls = batch_match_multi_categories(end_sentence, sentence_model, threshold_config[1]['closing'], closing_lexicons)

In [17]:
close_matched_result

[[' er electronics so the policy will be effective', ['no matching']],
 [' tokay lah you mean this policy i help you apply right now once i submit the application',
  ['no matching']],
 [' and then i sent you the policy number sure meaning it is approved but we will deduct the premium within three working days you will receive the policy documents via sms or email in three working days please ensure all information is correct lets say if you have any issues or questions you can call me again',
  ['data_enrichment']],
 [' yah ok do you still have any questions for me i will just help you with the application and send you the policy information shortly thank you so much bye bye',
  ['data_enrichment', 'standard_closing']]]

In [19]:
close_similarity_ls

[[0.0862126, 0.11242257, 0.109900825],
 [0.28964415, 0.3003706, 0.26638153],
 [0.3349283, 0.37765583, 0.20113377],
 [0.29277515, 0.4138102, 0.32034466]]

## Politeness Analysis

In [20]:
opening_politeness = nlp_sentiment(open_sentence_ls, open_label_ls, sentiment_tokenizer, sentiment_model)

In [21]:
opening_politeness

[[' okay this is alice here coming from income ntuc income',
  ['standard_greeting', 'purpose_of_call'],
  'neutral'],
 [' okay currently we partnership with ocbc plus can i check with you if you a good time to speak for a while first of course thank you for using ocbc',
  ['standard_greeting', 'purpose_of_call'],
  'polite'],
 ['okay and then secondly as a valued customer we would like to actually update and also give you a priority okay',
  ['standard_greeting', 'purpose_of_call'],
  'polite'],
 [' because in year twenty twenty right income actually celebrate our fiftieth annivesary okay',
  ['no matching'],
  'neutral'],
 [' and then in that particular year we actually came out a very very special anniversary policy called i fifty',
  ['purpose_of_call'],
  'neutral'],
 [' okay of course i stand for income income fifty means for fifty years',
  ['purpose_of_call'],
  'neutral'],
 ['alright okay miss junar even you already have a existing policy',
  ['standard_greeting', 'purpose_of_

In [22]:
closing_politeness = nlp_sentiment(close_sentence_ls, close_label_ls, sentiment_tokenizer, sentiment_model)

In [23]:
closing_politeness

[[' er electronics so the policy will be effective',
  ['no matching'],
  'polite'],
 [' tokay lah you mean this policy i help you apply right now once i submit the application',
  ['no matching'],
  'neutral'],
 [' and then i sent you the policy number sure meaning it is approved but we will deduct the premium within three working days you will receive the policy documents via sms or email in three working days please ensure all information is correct lets say if you have any issues or questions you can call me again',
  ['data_enrichment'],
  'neutral'],
 [' yah ok do you still have any questions for me i will just help you with the application and send you the policy information shortly thank you so much bye bye',
  ['data_enrichment', 'standard_closing'],
  'neutral']]

## Score Assignment

In [24]:
# read score_reference config file
ref_score_dict = load_yaml('config/score_reference.yaml')[0]

In [25]:
ref_score_dict

{'opening': {'standard_greeting': 1,
  'purpose_of_call': 2,
  'ask_for_permission': 2},
 'closing': {'follow_up': 2, 'data_enrichment': 2, 'standard_closing': 2}}

In [26]:
opening_pass_result = check_passed(opening_politeness, opening_lexicons.keys(), 'opening')
opening_pass_result

{'opening': {'standard_greeting': True,
  'purpose_of_call': True,
  'ask_for_permission': False}}

In [27]:
opening_score_result = assign_score(opening_pass_result, ref_score_dict, 'opening')
opening_score_result

({'opening': {'standard_greeting': 1,
   'purpose_of_call': 2,
   'ask_for_permission': 0}},
 3)

In [28]:
closing_pass_result = check_passed(closing_politeness, closing_lexicons.keys(), 'closing')
closing_pass_result

{'closing': {'follow_up': False,
  'data_enrichment': True,
  'standard_closing': True}}

In [29]:
closing_score_result = assign_score(closing_pass_result, ref_score_dict, 'closing')
closing_score_result

({'closing': {'follow_up': 0, 'data_enrichment': 2, 'standard_closing': 2}}, 4)

## Save to Config

In [30]:
total_score_result = [opening_score_result[0], closing_score_result[0]]
transcript_id = transcript_name.split(".")[0] + ".yaml"
save_address = os.path.join("qa_result", transcript_id)
dump_yaml(total_score_result, save_address)