## Import Necessary Libraries and set display options

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 400)

## Import Data from all the sources

In [None]:
data_1 = pd.read_excel("file_1.xlsx")
data_2 = pd.read_csv("file_2.xlsx")
data_3 = pd.read_csv("file_3.csv", low_memory=False, index_col=0)

## Data Cleaning

In [None]:
# Add filters to get the right set of data 
data_3 = data_3[~(data_3["column_1"]==13000)]
data_3 = data_3.copy()
data_3["columns_2"] = data_3["column_2"].fillna("others")
data_3 = data_3.copy()
data_3.rename(columns={'First Name':'first_name'}, inplace=True)
data_3.drop_duplicates(inplace=True)
data_3 = data_3.copy()
data_3.columns

## Into the Analysis

### Keywords for categories

In [None]:
category_1 = ["keyword_1", "keyword_2", "keyword_3", "keyword_4"]
category_2 = ["keyword_5", "keyword_6", "keyword_7", "keyword_8"]
category_3 = ["keyword_9", "keyword_10", "keyword_11"]

### Customer Feedback Selection

In [None]:
mydata = data_3[data_3["text_sentiment"].notna()]
mydata = mydata.copy()
mydata.drop_duplicates(inplace=True)
mydata["column_5"] = mydata["column_5"].astype(int)
mydata["analyzed_comment_type"] = "others"
mydata = mydata[mydata["f_created_by"]=="customer"]

### Contradictory Sentiments and Star Ratings

In [None]:
for key, val in mydata["text_sentiment"].value_counts().items():
  print(f"TOTAL {key} -> {round((val*100/len(mydata['text_sentiment'])))}")

print()
print(f"star_rating > 3: {round((len(mydata[mydata['star_rating']>3])*100/len(mydata)))}")
for key, val in mydata[mydata["star_rating"]>3]["text_sentiment"].value_counts().items():
  print(f"{key} -> {round((val*100/len(mydata[mydata['star_rating']>3])))}")

print()
print(f"star_rating < 3: {round((len(mydata[mydata['star_rating']<3])*100/len(mydata)))}")
for key, val in mydata[mydata["star_rating"]<3]["text_sentiment"].value_counts().items():
  print(f"{key} -> {round((val*100/len(mydata[mydata['star_rating']<3])))}")

print()
print(f"star_rating == 3: {round((len(mydata[mydata['star_rating']==3])*100/len(mydata)))}")
for key, val in mydata[mydata["star_rating"]==3]["text_sentiment"].value_counts().items():
  print(f"{key} -> {round((val*100/len(mydata[mydata['star_rating']==3])))}")

#### Select the ratings with value 3

In [None]:
mydata[(mydata['star_rating']==3)][["column_1", "column_2", "column_3", "column_4", "star_rating"]]

### Classifying the comments here

#### Classifying the comments into Categories of `NEGATIVE COMPLAINTS`

In [None]:
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_1))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_1"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_2))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_2"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_3))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_3"

negative_complaints = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="NEGATIVE")][["column_1", "complaint_type", "clean_feedback_text", "analyzed_comment_type", "text_sentiment"]]

#### Classifying the comments into Categories of `POSITIVE COMPLAINTS`

In [None]:
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_1))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_1"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_2))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_2"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_3))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_3"

positive_complaints = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="POSITIVE")][["column_1", "complaint_type", "clean_feedback_text", "analyzed_comment_type", "text_sentiment"]]

#### Classifying the comments into Categories of `POSITIVE NON-COMPLAINTS`

In [None]:
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_1))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_1"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_2))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_2"
mydata.loc[(mydata["clean_feedback_text"].str.contains("|".join(category_3))==True) & (mydata["text_sentiment"]=="NEGATIVE"), "analyzed_comment_type"] = "complaint_type_3"

positive_non_complaints = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="POSITIVE")][["column_1", "complaint_type", "clean_feedback_text", "analyzed_comment_type", "text_sentiment"]]

#### Classifying the comments into Categories of `NEGATIVE NON-COMPLAINTS`

In [None]:
negative_non_complaints = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="NEGATIVE")][["column_1", "complaint_type", "clean_feedback_text", "analyzed_comment_type", "text_sentiment"]]

#### Distribution of categories in different conditions in data

In [None]:
print("**Comment Types for the NEGATIVE Complaints**")
for key, val in mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="NEGATIVE")]["analyzed_comment_type"].value_counts().items():
  print(f"{key} -> {round(val*100/len(mydata[(mydata['column_1']==1) & (mydata['text_sentiment']=='NEGATIVE')]['analyzed_comment_type']), 2)} %")

print()
print("**Comment Types for the POSITIVE Complaints**")
for key, val in mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="POSITIVE")]["analyzed_comment_type"].value_counts().items():
  print(f"{key} -> {round(val*100/len(mydata[(mydata['column_1']==1) & (mydata['text_sentiment']=='POSITIVE')]['analyzed_comment_type']), 2)} %")

print()
print("**Comment Types for the NEGATIVE NON-COMPLAINTS**")
for key, val in mydata[(mydata["column_1"]==0) & (mydata["text_sentiment"]=="NEGATIVE")]["analyzed_comment_type"].value_counts().items():
  print(f"{key} -> {round(val*100/len(mydata[(mydata['column_1']==0) & (mydata['text_sentiment']=='NEGATIVE')]['analyzed_comment_type']), 2)} %")

print()
print("**Comment Types for the POSITIVE NON-COMPLAINTS**")
for key, val in mydata[(mydata["column_1"]==0) & (mydata["text_sentiment"]=="POSITIVE")]["analyzed_comment_type"].value_counts().items():
  print(f"{key} -> {round(val*100/len(mydata[(mydata['column_1']==0) & (mydata['text_sentiment']=='POSITIVE')]['analyzed_comment_type']), 2)} %")

#### Entity having number of feedbacks in every category

In [None]:
non_complaints = ["praise", "suggestion", "other"]

entity_analyzed_comment_counts = mydata.groupby(["id", "name", "analyzed_comment_type"]).apply(lambda x: x["analyzed_comment_type"].count()).reset_index()
entity_analyzed_comment_counts.columns = ['id', 'name', 'analyzed_comment_type', "analyzed_comment_type_count"]

sorted_entity_comments1 = entity_analyzed_comment_counts.sort_values("analyzed_comment_type_count", ascending=False)
sorted_entity_comments = entity_analyzed_comment_counts[~entity_analyzed_comment_counts["analyzed_comment_type"].str.contains("|".join(non_complaints))].sort_values("analyzed_comment_type_count", ascending=False)

entity_overall_complaint_count = sorted_entity_comments.groupby(["id", "name"]).apply(lambda x: x["analyzed_comment_type_count"].sum()).reset_index()
entity_overall_complaint_count.columns = ["id", "name", "complaints_count"]
entity_overall_complaint_count.sort_values("complaints_count", ascending=False, inplace=True)

entity_complaints = entity_overall_complaint_count.merge(data_2[["id", "Orders"]], on='id', how='left')
entity_complaints["complaint_percentage"] = entity_complaints["complaints_count"]*100/entity_complaints["Orders"]
entity_complaints = entity_complaints.sort_values("Orders", ascending=False)

min_entity_orders = entity_complaints[entity_complaints["Orders"]>=3000]
min_entity_orders = min_entity_orders.sort_values("complaint_percentage", ascending=False)

#### Charts of the Analysis

In [None]:
import matplotlib.pyplot as plt

color_dict = {
    "NEGATIVE": "#d62728",
    "POSITIVE": "#2ca02c",
    "others": "tab:orange",
    "category_1": "tab:green",
    "category_2": "#ffb700",
    "category_3": "tab:purple"
}

def pie_chart_maker(data, labels, colors, title, figsize, fontsize):
  plt.figure(figsize=figsize)
  plt.rcParams.update({'font.size': fontsize})
  plt.pie(data, autopct='%.1f%%', colors=colors)
  plt.legend(title=title, labels=labels)
  plt.show()


def bar_chart_maker(x_names, y_names, xlabel, ylabel, title, figsize, fontsize, width=0.6):
  plt.figure(figsize=figsize)
  plt.bar(x_names, y_names, width=width)
  plt.title(title, fontsize=fontsize)
  plt.xlabel(xlabel, fontsize=fontsize)
  plt.xticks(rotation=45, ha="right")
  plt.ylabel(ylabel, fontsize=fontsize)
  plt.show()

#### Distribution of Positive and Negative Feedback

In [None]:
data = mydata["text_sentiment"].value_counts()
labels = dict(data).keys()
chart_colors = [color_dict[key] for key in labels]
labels = [text.replace("_", " ").title() for text in labels]
title = "Sentiments"
figsize = (10, 8)
fontsize = 14

pie_chart_maker(data, labels, chart_colors, title, figsize, fontsize)

#### Relation between Star Ratings and Sentiments

In [None]:
srg3 = dict(mydata[mydata["star_rating"]>3]["text_sentiment"].value_counts())
srl3 = dict(mydata[mydata["star_rating"]<3]["text_sentiment"].value_counts())
sre3 = dict(mydata[mydata["star_rating"]==3]["text_sentiment"].value_counts())

col1 = "Star Rating > 3"
col2 = "Star Rating < 3"
col3 = "Star Rating = 3"

dictt = {
    col1: [],
    col2: [],
    col3: []
}

sentis = ["POSITIVE", "NEGATIVE"]

for key1, key2, key3, senti in zip(srg3.items(), srl3.items(), sre3.items(), sentis):
  dictt[col1].append(srg3[senti])
  dictt[col2].append(srl3[senti])
  dictt[col3].append(sre3[senti])

stardf =  pd.DataFrame(
              dictt, index=sentis
          ).T


stardf.plot(kind="bar", color=[color_dict["POSITIVE"], color_dict["NEGATIVE"]])
# plt.title("Star Rating vs Sentiment")
plt.xlabel("Star Rating")
plt.xticks(rotation=45, ha="right")
plt.ylabel("Sentiment")

#### Comment Types for the NEGATIVE Complaints

In [None]:
data = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="NEGATIVE")]["analyzed_comment_type"].value_counts()
labels = dict(data).keys()
chart_colors = [color_dict[key] for key in labels]
labels = [text.replace("_", " ").title() for text in labels]
title = "Negative Complaints"
figsize = (20, 29)
fontsize = 16

pie_chart_maker(data, labels, chart_colors, title, figsize, fontsize)

#### Comment Types for the POSITIVE Complaints

In [None]:
data = mydata[(mydata["column_1"]==1) & (mydata["text_sentiment"]=="POSITIVE")]["analyzed_comment_type"].value_counts()
labels = dict(data).keys()
chart_colors = [color_dict[key] for key in labels]
labels = [text.replace("_", " ").title() for text in labels]
title = "Positive Complaints"
figsize = (20, 29)
fontsize = 16

pie_chart_maker(data, labels, chart_colors, title, figsize, fontsize)

#### Comment Types for the NEGATIVE NON-COMPLAINTS

In [None]:
data = mydata[(mydata["column_1"]==0) & (mydata["text_sentiment"]=="NEGATIVE")]["analyzed_comment_type"].value_counts()
labels = dict(data).keys()
chart_colors = [color_dict[key] for key in labels]
labels = [text.replace("_", " ").title() for text in labels]
title = "Negative Non-Complaints"
figsize = (20, 29)
fontsize = 16

pie_chart_maker(data, labels, chart_colors, title, figsize, fontsize)

#### Comment Types for the POSITIVE NON-COMPLAINTS

In [None]:
data = mydata[(mydata["column_1"]==0) & (mydata["text_sentiment"]=="POSITIVE")]["analyzed_comment_type"].value_counts()
labels = dict(data).keys()
chart_colors = [color_dict[key] for key in labels]
labels = [text.replace("_", " ").title() for text in labels]
title = "Positive Non-Complaints"
figsize = (20, 29)
fontsize = 16

pie_chart_maker(data, labels, chart_colors, title, figsize, fontsize)