# Text Analysis

In [1]:
# Importing basic libraries
import pandas as pd
import numpy as np
import spacy
import string
punct = string.punctuation
nlp = spacy.load('en_core_web_sm') #Loading spacy english

In [2]:
df = pd.read_excel('AllTranscriptions.xlsx')  #reading reviews dataset
print(df.shape)
df.head()

(65, 3)


Unnamed: 0,Sno,Text,State
0,1,"Hi, welcome to Pizza Hut. My name is Christine...",Null
1,2,"Hi, welcome to a Pizza Hut is Chris may have y...",NSW
2,3,Thank you for calling Pizza Hut. This is Jenny...,Newtown
3,4,Thanks for calling Pizza Hut to spell. Can I h...,Victoria
4,5,"Hi, welcome to Pizza Hut. My name is Christine...",Null


In [3]:
df.isnull().sum()

Sno      0
Text     0
State    0
dtype: int64

In [4]:
df['State'].value_counts()

NSW         24
QLD         12
Null        11
Victoria    11
WA           3
NT           2
SA           1
Newtown      1
Name: State, dtype: int64

# Text Preprocessing

In [5]:
def text_clean(text):
    text = text.lower()  #Convert text in lower case
    punc_removed = [char for char in text if char not in punct]  #Removing Punctuations
    punc_removed_join = ''.join(punc_removed) 
    
    doc= nlp(punc_removed_join)
    text_out = [token.lemma_ for token in doc if token.is_stop == False and token.is_alpha and len(token)>2]
    txt = ' '.join(text_out)
    return txt
    
    
    
#     for token in doc:
#         if token.is_stop == False and token.is_alpha and len(token)>2:
#             lemma = token.lemma_            #lemmatization of token word
#             text_out.append(lemma)
            
#     return text_out

In [6]:
df['clean_text'] = df['Text'].apply(text_clean)
df.head()

Unnamed: 0,Sno,Text,State,clean_text
0,1,"Hi, welcome to Pizza Hut. My name is Christine...",Null,welcome pizza hut christine pleasehi dear ipi ...
1,2,"Hi, welcome to a Pizza Hut is Chris may have y...",NSW,welcome pizza hut chris pleasesee pay like hea...
2,3,Thank you for calling Pizza Hut. This is Jenny...,Newtown,thank call pizza hut jenny nameis pizza hut ca...
3,4,Thanks for calling Pizza Hut to spell. Can I h...,Victoria,thank call pizza hut spell pleasehi call pick ...
4,5,"Hi, welcome to Pizza Hut. My name is Christine...",Null,welcome pizza hut christine pleasehi dear ipi ...


# Sentiment Analysis

In [7]:
# Create function for polarity checking
from spacytextblob.spacytextblob import SpacyTextBlob
nlp = spacy.load('en_core_web_sm')
spacy_text_blob = SpacyTextBlob()
nlp.add_pipe(spacy_text_blob)

def polarity(text):
    doc = nlp(text)
    pol = float(format(doc._.sentiment.polarity, '.3f'))
    return pol

In [8]:
df['clean_text'][0]

'welcome pizza hut christine pleasehi dear ipi order somebody call place pizza place order reference number give line get disconnectedso place orderso say ask bring machine intend thing say yes finalize reference number line get cut offand remember talk alright hold let ahead double check request mobile app plus sorryv victor alpha samif alpha fanelli friend want surname actually betterfull yes want victor alpha sam alpha informally diva tom hotel india nancy feel india basanti neosho young living course shanker surname hotel alphain finance ekipa kite aar shankarsir kindly stay line real quick ahead talk double check sure order pizza dinner basically want pizza line get disconnected know order understandyes understand moment ill ahead touch line'

In [9]:
polarity(df['clean_text'][0])

0.222

In [10]:
df['polarity'] = df['clean_text'].apply(polarity)


In [11]:
df.sample(10)

Unnamed: 0,Sno,Text,State,clean_text,polarity
56,57,"Hi, this is Michael from Pizza Hut near. Pleas...",Null,michael pizza hut near namea highly hello sist...,0.137
0,1,"Hi, welcome to Pizza Hut. My name is Christine...",Null,welcome pizza hut christine pleasehi dear ipi ...,0.222
47,48,Thanks for calling Pizza Hut. This is Jenny. M...,WA,thank call pizza hut jenny namehello spell mey...,0.066
31,32,"Hello, thank you for calling.Your name?Yes, my...",QLD,hello thank callingyour nameyes call pick deli...,-0.067
28,29,Thank you for calling Pizza Hut is Joanna spea...,NSW,thank call pizza hut joanna speak namemy call ...,-0.083
38,39,"Hi, welcome to Pizza Hut Irene speaking. Can I...",QLD,welcome pizza hut irene speak pleaseoffline ha...,0.1
33,34,Thank you for calling Pizza Hut. My name is Ka...,NSW,thank call pizza hut karen call pick deliveryd...,0.057
2,3,Thank you for calling Pizza Hut. This is Jenny...,Newtown,thank call pizza hut jenny nameis pizza hut ca...,-0.062
17,18,"Hi, welcome to a Pizza Hut is Chris may have y...",NSW,welcome pizza hut chris pleasesee pay like hea...,-0.029
43,44,Thank you for calling Pizza Hut. This is Chloe...,QLD,thank call pizza hut chloe pleasea thank call ...,0.038


In [12]:
df['sentiment'] = df['polarity'].apply(lambda x: "Pos" if x>0 else("Neg" if x<0 else "Neu") )
df.sample(10)

Unnamed: 0,Sno,Text,State,clean_text,polarity,sentiment
20,21,"Call pizza offices. Chloe may have your name, ...",Victoria,pizza office chloe pleasehi connect crumble pi...,-0.022,Neg
56,57,"Hi, this is Michael from Pizza Hut near. Pleas...",Null,michael pizza hut near namea highly hello sist...,0.137,Pos
61,62,Thank you for calling Pizza Hut this is Shane....,QLD,thank call pizza hut shane namemy call pick de...,-0.078,Neg
64,65,"Hi, thank you for calling pizza. This is VL, m...",Null,thank call pizza pleasehi pizza hut wanna room...,-0.009,Neg
45,46,Thank you for calling Pizza Hut may know your ...,NSW,thank call pizza hut know thank call pick deli...,0.109,Pos
52,53,Want to chat minimis? Karen may have your name...,NSW,want chat minimis karen yes call pick delivery...,-0.03,Neg
62,63,"Hi, welcome to Pizza Hut. My name is Christine...",QLD,welcome pizza hut christine pleaseis delivery ...,0.197,Pos
22,23,Welcome to Pizza Hut. My name is Miguel can ha...,Null,welcome pizza hut miguel pleasemy good afterno...,0.217,Pos
39,40,Thank you for calling Pizza Hut. This is Jenny...,Null,thank call pizza hut jenny namea mattress sate...,0.344,Pos
28,29,Thank you for calling Pizza Hut is Joanna spea...,NSW,thank call pizza hut joanna speak namemy call ...,-0.083,Neg


In [13]:
df.sentiment.value_counts()

Pos    45
Neg    19
Neu     1
Name: sentiment, dtype: int64

#  Emotion Analysis

In [14]:
def emotion_analysis(text):
    emotion_list = []
    with open('Emotions.txt', 'r') as file:
        for line in file:
            clear_line = line.replace("\n", '').replace(",", '').replace("'", '').strip()
            word, emotion = clear_line.split(':')
        
            if word in text:
                emotion_list.append(emotion)
    return emotion_list

In [17]:
df['emotions'] = df['clean_text'].apply(emotion_analysis)
df['emotions'] = df['emotions'].apply(lambda x: ', '.join(x))
df.sample(10)

Unnamed: 0,Sno,Text,State,clean_text,polarity,sentiment,emotions
0,1,"Hi, welcome to Pizza Hut. My name is Christine...",Null,welcome pizza hut christine pleasehi dear ipi ...,0.222,Pos,"attached, alone, apathetic"
37,38,"Hi, welcome to Pizza Hut Irene speaking. Can I...",QLD,welcome pizza hut irene speak pleaseoffline ha...,0.1,Pos,"fearless, free, happy, apathetic"
48,49,Thank you for calling Pizza Hut. This is Joann...,Victoria,thank call pizza hut joanna speak namehow call...,0.133,Pos,"fearful, happy, apathetic"
1,2,"Hi, welcome to a Pizza Hut is Chris may have y...",NSW,welcome pizza hut chris pleasesee pay like hea...,-0.029,Neg,"free, sad, sad, apathetic"
55,56,Welcome to Pizza Hut. My name is Miguel. Can I...,Victoria,welcome pizza hut miguel namegood evening orde...,0.07,Pos,"fearful, apathetic"
49,50,Welcome to Pizza Hut. My name is Travis Mayer ...,NSW,welcome pizza hut travis mayer bringing hello ...,0.076,Pos,sad
34,35,Contact center my name is Maxine. May have you...,QLD,contact center maxine pleaseyes freeze call de...,-0.078,Neg,"angry, free, sad, sad, apathetic"
19,20,"Hi, thank you for calling Pizza Hut. My name i...",QLD,thank call pizza hut angel pleaseare fan order...,0.095,Pos,apathetic
62,63,"Hi, welcome to Pizza Hut. My name is Christine...",QLD,welcome pizza hut christine pleaseis delivery ...,0.197,Pos,apathetic
52,53,Want to chat minimis? Karen may have your name...,NSW,want chat minimis karen yes call pick delivery...,-0.03,Neg,apathetic


In [21]:
df.sentiment.value_counts()

Pos    45
Neg    19
Neu     1
Name: sentiment, dtype: int64