In [6]:
import pandas as pd
import numpy as np
import nltk
import re

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [7]:
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\PAYAL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [9]:
data = pd.read_csv("data/support_tickets.csv", encoding="latin1")
data.head()


Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0


In [10]:
data = data.dropna(subset=[
    'Ticket Description',
    'Ticket Type',
    'Ticket Priority'
])

data[['Ticket Description', 'Ticket Type', 'Ticket Priority']].head()


Unnamed: 0,Ticket Description,Ticket Type,Ticket Priority
0,I'm having an issue with the {product_purchase...,Technical issue,Critical
1,I'm having an issue with the {product_purchase...,Technical issue,Critical
2,I'm facing a problem with my {product_purchase...,Technical issue,Low
3,I'm having an issue with the {product_purchase...,Billing inquiry,Low
4,I'm having an issue with the {product_purchase...,Billing inquiry,Low


In [11]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)


In [12]:
data['clean_text'] = data['Ticket Description'].apply(clean_text)
data[['Ticket Description', 'clean_text']].head()


Unnamed: 0,Ticket Description,clean_text
0,I'm having an issue with the {product_purchase...,issue product purchased please assist billing ...
1,I'm having an issue with the {product_purchase...,issue product purchased please assist need cha...
2,I'm facing a problem with my {product_purchase...,facing problem product purchased product purch...
3,I'm having an issue with the {product_purchase...,issue product purchased please assist problem ...
4,I'm having an issue with the {product_purchase...,issue product purchased please assist note sel...


In [13]:
X = data['clean_text']
y_category = data['Ticket Type']


In [14]:
vectorizer = TfidfVectorizer(max_features=3000)
X_tfidf = vectorizer.fit_transform(X)


In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf, y_category, test_size=0.2, random_state=42
)


In [16]:
category_model = LogisticRegression(max_iter=1000)
category_model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [17]:
y_pred = category_model.predict(X_test)

print("Category Classification Accuracy:")
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Category Classification Accuracy:
0.2012987012987013
                      precision    recall  f1-score   support

     Billing inquiry       0.18      0.14      0.16       357
Cancellation request       0.18      0.20      0.19       327
     Product inquiry       0.20      0.19      0.19       316
      Refund request       0.21      0.24      0.23       345
     Technical issue       0.22      0.24      0.23       349

            accuracy                           0.20      1694
           macro avg       0.20      0.20      0.20      1694
        weighted avg       0.20      0.20      0.20      1694



In [18]:
y_priority = data['Ticket Priority']


In [19]:
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(
    X_tfidf, y_priority, test_size=0.2, random_state=42
)


In [20]:
priority_model = LogisticRegression(max_iter=1000)
priority_model.fit(X_train_p, y_train_p)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [21]:
y_pred_p = priority_model.predict(X_test_p)

print("Priority Classification Accuracy:")
print(accuracy_score(y_test_p, y_pred_p))
print(classification_report(y_test_p, y_pred_p))


Priority Classification Accuracy:
0.2597402597402597
              precision    recall  f1-score   support

    Critical       0.25      0.26      0.25       411
        High       0.28      0.28      0.28       409
         Low       0.22      0.21      0.22       415
      Medium       0.29      0.28      0.29       459

    accuracy                           0.26      1694
   macro avg       0.26      0.26      0.26      1694
weighted avg       0.26      0.26      0.26      1694

