In [1]:
import joblib

import numpy as np
import pandas as pd

import re
import string
import itertools                                                                 #plotting_confusion Matrix
import imblearn

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

import gensim
from textblob import TextBlob
from gensim.models import Word2Vec

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from tqdm import tqdm                                                           #Progress bar
from bs4 import BeautifulSoup
from collections import Counter
from IPython.display import display

import warnings
warnings.filterwarnings("ignore")
%matplotlib inline

In [2]:
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
import lightgbm as lgbm


from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

## Load the saved Count Vectorizer

In [3]:
count_vectorizer = joblib.load('count_vectorizer.joblib')

## Load the saved model from the file:

In [4]:
model = joblib.load('BOW_Ensemble_voting_classifier.joblib')

### Read the Excel file containing the text data:

In [5]:
data = pd.read_excel('Transcribed_Sample_Data.xlsx')

In [6]:
data.head()

Unnamed: 0,Transcribe_output,Keyphrases,AgentTranscription,CustomerTranscription,AgentIntent,CustomerIntent,Hold_Time,Duration,Before_Hold_Agent,Before_Hold_Customer,After_Hold_Agent,After_Hold_Customer
0,my name is how can I delight you today? Um Goo...,"['my name', 'today', 'Um Good morning', 'this ...",my name is how can I delight you today?. Yeah ...,Um Good morning actually when I was issued thi...,"['my name', 'today', 'the card holder', 'this ...","['Um Good', 'this card', 'uh', 'no uh', 'this ...",20.49,144.76,my name is how can I delight you today Yeah t...,Um Good morning actually when I was issued th...,,
1,"Okay, could you Thank you? So uh couple of day...","['couple', 'days', 'the credit card', 'request...","could you Thank you? So. Okay,. One second. Ok...","Okay,. uh couple of days ago I have requested ...","['One second', 'the rest', 'the annual members...","['couple', 'days', 'the credit card', 'request...",100.41,238.7,could you Thank you So Okay One second Okay s...,Okay uh couple of days ago I have requested f...,,
2,good evening. Thank you for choosing our bank....,"['good evening', 'our bank', 'My name', 'today...",evening. Thank you for choosing our bank. My n...,good. today? Uh Card 67. Okay. Yeah. Yeah well...,"['evening', 'our bank', 'My name', 'maxima car...","['today', 'Uh Card 67', 'October', '80', '10 u...",843.62,1141.13,evening Thank you for choosing our bank My na...,good today,maxima card Okay October October 600% residen...,Uh Card 67 Okay Yeah Yeah well Okay Okay Octo...
3,very good evening. Thank you for choosing our ...,"['very good evening', 'our bank', 'My name', '...",very good evening. Thank you for choosing our ...,"Uh actually 27,000. So uh before the statement...","['very good evening', 'our bank', 'My name', '...","['the statement', 'No I', 'the conversation', ...",277.175,739.28,very good evening Thank you for choosing our ...,"Uh actually 27,000 So uh before the statement...",,
4,Good evening. Thank you for choosing my name i...,"['my name', 'today', 'my card', 'my card', 'th...",Good evening. Thank you for choosing my name i...,my card That's. I want to cancel my card.. I d...,"['my name', 'today', 'the reason', 'the card',...","['my card', 'my card', 'my card', 'Uh', 'any w...",63.97,284.41,Good evening Thank you for choosing my name i...,my card That's I want to cancel my card I don...,,


In [7]:
data.columns

Index(['Transcribe_output', 'Keyphrases', 'AgentTranscription',
       'CustomerTranscription', 'AgentIntent', 'CustomerIntent', 'Hold_Time',
       'Duration', 'Before_Hold_Agent', 'Before_Hold_Customer',
       'After_Hold_Agent', 'After_Hold_Customer'],
      dtype='object')

## Data Preprocessing

In [8]:
# Initialize the NLTK lemmatizer and stop words
lemmatizer = WordNetLemmatizer()
stopword = set(stopwords.words('english'))

In [9]:
# Data Cleaning Function
custom_stopwords = ["okay", "uh"]  # Add extra stopwords here

def data_cleaner(data):
    clean_data = []
    for sentence in tqdm(data):
        # Clean HTML tags
        cleantext = BeautifulSoup(sentence, "lxml").text

        # Remove content within square brackets
        cleantext = re.sub('\[[^]]*\]', ' ', cleantext)

        # Remove characters except letters
        cleantext = re.sub('[^a-zA-Z]', ' ', cleantext)

        # Remove punctuation
        cleantext = re.sub(r'[^\w\s]', '', cleantext)

        # Tokenize the text and remove stopwords
        tokens = cleantext.lower().split()
        
        # Apply filters to exclude tokens with a length of 1 (single characters) 
        # As instance like "c", "b", "r", "p", and "b", need to remove
        tokens = [token for token in tokens if token not in stopword and token not in custom_stopwords and len(token) > 1]  

        # Lemmatize the tokens
        lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]

        # Join the tokens back into a single string
        cleantext = ' '.join(lemmatized_tokens).strip()

        clean_data.append(cleantext)
    
    return clean_data

In [10]:
data['Cleaned_Overall_Text'] = data_cleaner(data['Transcribe_output'])

100%|███████████████████████████████████████████████████████████████████████████████| 812/812 [00:03<00:00, 265.94it/s]


In [11]:
data.head()

Unnamed: 0,Transcribe_output,Keyphrases,AgentTranscription,CustomerTranscription,AgentIntent,CustomerIntent,Hold_Time,Duration,Before_Hold_Agent,Before_Hold_Customer,After_Hold_Agent,After_Hold_Customer,Cleaned_Overall_Text
0,my name is how can I delight you today? Um Goo...,"['my name', 'today', 'Um Good morning', 'this ...",my name is how can I delight you today?. Yeah ...,Um Good morning actually when I was issued thi...,"['my name', 'today', 'the card holder', 'this ...","['Um Good', 'this card', 'uh', 'no uh', 'this ...",20.49,144.76,my name is how can I delight you today Yeah t...,Um Good morning actually when I was issued th...,,,name delight today um good morning actually is...
1,"Okay, could you Thank you? So uh couple of day...","['couple', 'days', 'the credit card', 'request...","could you Thank you? So. Okay,. One second. Ok...","Okay,. uh couple of days ago I have requested ...","['One second', 'the rest', 'the annual members...","['couple', 'days', 'the credit card', 'request...",100.41,238.7,could you Thank you So Okay One second Okay s...,Okay uh couple of days ago I have requested f...,,,could thank couple day ago requested credit ca...
2,good evening. Thank you for choosing our bank....,"['good evening', 'our bank', 'My name', 'today...",evening. Thank you for choosing our bank. My n...,good. today? Uh Card 67. Okay. Yeah. Yeah well...,"['evening', 'our bank', 'My name', 'maxima car...","['today', 'Uh Card 67', 'October', '80', '10 u...",843.62,1141.13,evening Thank you for choosing our bank My na...,good today,maxima card Okay October October 600% residen...,Uh Card 67 Okay Yeah Yeah well Okay Okay Octo...,good evening thank choosing bank name may deli...
3,very good evening. Thank you for choosing our ...,"['very good evening', 'our bank', 'My name', '...",very good evening. Thank you for choosing our ...,"Uh actually 27,000. So uh before the statement...","['very good evening', 'our bank', 'My name', '...","['the statement', 'No I', 'the conversation', ...",277.175,739.28,very good evening Thank you for choosing our ...,"Uh actually 27,000 So uh before the statement...",,,good evening thank choosing bank name delete t...
4,Good evening. Thank you for choosing my name i...,"['my name', 'today', 'my card', 'my card', 'th...",Good evening. Thank you for choosing my name i...,my card That's. I want to cancel my card.. I d...,"['my name', 'today', 'the reason', 'the card',...","['my card', 'my card', 'my card', 'Uh', 'any w...",63.97,284.41,Good evening Thank you for choosing my name i...,my card That's I want to cancel my card I don...,,,good evening thank choosing name delight today...


## Text Vectorization - TFIDF

In [12]:
# Transform the new data using the pre-trained vectorizer
count_data = count_vectorizer.transform(data['Cleaned_Overall_Text'])

## ML Model Sentiment Classification

In [13]:
# Use the loaded model to make predictions on the preprocessed text data
predictions = model.predict(count_data)

In [14]:
# Map the numerical labels back to the actual sentiment labels
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
predictions = [sentiment_labels[label] for label in predictions]

In [15]:
# Add the predictions to the DataFrame or create a new column to store them
data['Overall_sentiment'] = predictions

In [16]:
data.head(2)

Unnamed: 0,Transcribe_output,Keyphrases,AgentTranscription,CustomerTranscription,AgentIntent,CustomerIntent,Hold_Time,Duration,Before_Hold_Agent,Before_Hold_Customer,After_Hold_Agent,After_Hold_Customer,Cleaned_Overall_Text,Overall_sentiment
0,my name is how can I delight you today? Um Goo...,"['my name', 'today', 'Um Good morning', 'this ...",my name is how can I delight you today?. Yeah ...,Um Good morning actually when I was issued thi...,"['my name', 'today', 'the card holder', 'this ...","['Um Good', 'this card', 'uh', 'no uh', 'this ...",20.49,144.76,my name is how can I delight you today Yeah t...,Um Good morning actually when I was issued th...,,,name delight today um good morning actually is...,Positive
1,"Okay, could you Thank you? So uh couple of day...","['couple', 'days', 'the credit card', 'request...","could you Thank you? So. Okay,. One second. Ok...","Okay,. uh couple of days ago I have requested ...","['One second', 'the rest', 'the annual members...","['couple', 'days', 'the credit card', 'request...",100.41,238.7,could you Thank you So Okay One second Okay s...,Okay uh couple of days ago I have requested f...,,,could thank couple day ago requested credit ca...,Positive


In [17]:
data['Overall_sentiment'].value_counts()

Positive    729
Negative     54
Neutral      29
Name: Overall_sentiment, dtype: int64

In [18]:
data.to_excel('Output_Classified_Sentiments.xlsx', index=False)