Project: 7 Ticket Categorization and Routing

Problem Statement:
Automatically classify and route customer support tickets.

In [120]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [121]:
os.chdir(os.getcwd().replace('notebooks', 'data'))

df = pd.read_csv('data.csv')


In [122]:
df.sample(10)


Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
2646,2647,Charles Welch,ashley56@example.com,25,Female,Canon DSLR Camera,2021-12-08,Refund request,Hardware issue,The {product_purchased} is unable to establish...,Open,,Medium,Email,,,
3229,3230,Benjamin Pierce,mcmahonrobert@example.net,59,Other,Microsoft Surface,2021-05-13,Product inquiry,Product compatibility,There seems to be a glitch in the {product_pur...,Closed,Until tend another.,Low,Social media,2023-06-01 13:00:06,2023-06-01 17:29:06,2.0
522,523,Richard Cunningham,camachojason@example.com,18,Female,GoPro Hero,2021-08-02,Product inquiry,Network problem,The {product_purchased} is unable to establish...,Open,,High,Chat,,,
6519,6520,Sarah Lee,mariahmartin@example.net,51,Other,Autodesk AutoCAD,2020-01-29,Technical issue,Data loss,I'm having trouble connecting my {product_purc...,Open,,High,Chat,,,
483,484,Brandi Torres,janetfox@example.com,37,Other,Amazon Kindle,2020-09-24,Technical issue,Product setup,My {product_purchased} is making strange noise...,Closed,Meet discuss answer us specific remain.,Low,Phone,2023-06-01 21:39:41,2023-06-01 00:26:41,5.0
3549,3550,Scott Reeves,paula71@example.org,42,Other,Amazon Kindle,2020-11-03,Product inquiry,Product recommendation,I'm having an issue with the {product_purchase...,Pending Customer Response,,Medium,Chat,2023-06-01 01:07:15,,
979,980,Anna Lutz,michael65@example.net,31,Female,Fitbit Versa Smartwatch,2020-05-14,Cancellation request,Network problem,I'm facing issues logging into my {product_pur...,Open,,Medium,Social media,,,
1781,1782,April Turner,arnoldcrystal@example.com,45,Female,Google Nest,2020-05-26,Refund request,Network problem,I'm having an issue with the {product_purchase...,Closed,Thus current side try west.,High,Social media,2023-06-01 05:00:28,2023-06-01 18:43:28,3.0
1085,1086,Jasmine Cooper,orrsarah@example.com,53,Male,LG OLED,2021-06-30,Cancellation request,Display issue,I'm encountering a software bug in the {produc...,Open,,High,Phone,,,
4004,4005,Sara Patterson,dhamilton@example.org,29,Female,Samsung Soundbar,2020-01-29,Technical issue,Product recommendation,"I've recently set up my {product_purchased}, b...",Closed,Yeah relationship think my especially responsi...,Critical,Email,2023-06-01 15:48:43,2023-06-01 01:27:43,2.0


In [123]:
df = df[['Ticket Description', 'Ticket Type']]


In [124]:
df.columns = ['description', 'category']


In [125]:
df.sample(10)


Unnamed: 0,description,category
6759,I'm having an issue with the {product_purchase...,Cancellation request
6593,I'm having an issue with the {product_purchase...,Product inquiry
5299,I'm having an issue with the {product_purchase...,Cancellation request
8240,My {product_purchased} is making strange noise...,Technical issue
2006,I'm facing a problem with my {product_purchase...,Billing inquiry
4797,I'm facing issues logging into my {product_pur...,Billing inquiry
900,I'm having an issue with the {product_purchase...,Technical issue
710,I'm having an issue with the {product_purchase...,Refund request
3166,I've noticed a software bug in the {product_pu...,Billing inquiry
8415,I've noticed a software bug in the {product_pu...,Billing inquiry


In [126]:
df.shape


(8469, 2)

In [127]:
df.head()


Unnamed: 0,description,category
0,I'm having an issue with the {product_purchase...,Technical issue
1,I'm having an issue with the {product_purchase...,Technical issue
2,I'm facing a problem with my {product_purchase...,Technical issue
3,I'm having an issue with the {product_purchase...,Billing inquiry
4,I'm having an issue with the {product_purchase...,Billing inquiry


In [128]:
df.isna().sum()


description    0
category       0
dtype: int64

In [129]:
df.duplicated().sum()


279

In [130]:
df.drop_duplicates(inplace=True)


In [131]:
df.shape


(8190, 2)

In [132]:
df.sample(10)


Unnamed: 0,description,category
3314,My {product_purchased} is making strange noise...,Refund request
7307,I'm having an issue with the {product_purchase...,Cancellation request
7209,I'm having trouble connecting my {product_purc...,Cancellation request
1709,I'm having an issue with the {product_purchase...,Product inquiry
8243,I'm having trouble connecting my {product_purc...,Billing inquiry
2887,I'm having an issue with the {product_purchase...,Cancellation request
5141,I'm having an issue with the {product_purchase...,Product inquiry
3765,I'm facing a problem with my {product_purchase...,Billing inquiry
3738,The {product_purchased} is unable to establish...,Product inquiry
4403,I'm having an issue with the {product_purchase...,Product inquiry


In [133]:
df['category'].value_counts()


category
Refund request          1688
Technical issue         1670
Cancellation request    1640
Product inquiry         1600
Billing inquiry         1592
Name: count, dtype: int64

In [134]:
category_mapping = {
    "Refund request" : 0,
    "Technical issue" : 1,
    "Cancellation request" : 2,
    "Product inquiry" : 3,
    "Billing inquiry" : 4
}


In [135]:
df['category'] = df['category'].map(category_mapping)


In [136]:
df.sample(5)


Unnamed: 0,description,category
5284,I've encountered a data loss issue with my {pr...,3
511,I'm having an issue with the {product_purchase...,3
6686,I'm having an issue with the {product_purchase...,2
407,I'm having an issue with the {product_purchase...,1
3675,I'm having an issue with the {product_purchase...,2


In [137]:
X, y = df['description'], df['category']


In [138]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [139]:

import re
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()


def convert_text(text_series):
    return text_series.apply(lambda text: ' '.join(
        stemmer.stem(word) for word in re.findall(r'\b\w+\b', text.lower())
    ))


In [140]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier


In [157]:
model = Pipeline([
    ('preprocess', FunctionTransformer(convert_text, validate=False)),
    ('tfidf', TfidfVectorizer(stop_words='english', max_features=5000)),
    ('clf', DecisionTreeClassifier())
])


In [158]:
model.fit(X_train, y_train)


In [159]:
model.score(X_test, y_test)


0.20146520146520147

In [160]:
from sentence_transformers import SentenceTransformer


In [161]:
model = SentenceTransformer('all-MiniLM-L6-v2')


In [164]:
X_train_embed = model.encode(X_train.tolist())
X_test_embed = model.encode(X_test.tolist())


In [165]:
model = DecisionTreeClassifier()
model.fit(X_train_embed, y_train)


In [166]:
model.score(X_test_embed, y_test)


0.2045177045177045