In [3]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Import Data

In [5]:
data = pd.read_csv('customer_support_tickets.csv')
data.head(10)

Unnamed: 0,Ticket ID,Customer Name,Customer Email,Customer Age,Customer Gender,Product Purchased,Date of Purchase,Ticket Type,Ticket Subject,Ticket Description,Ticket Status,Resolution,Ticket Priority,Ticket Channel,First Response Time,Time to Resolution,Customer Satisfaction Rating
0,1,Marisa Obrien,carrollallison@example.com,32,Other,GoPro Hero,2021-03-22,Technical issue,Product setup,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Social media,2023-06-01 12:15:36,,
1,2,Jessica Rios,clarkeashley@example.com,42,Female,LG Smart TV,2021-05-22,Technical issue,Peripheral compatibility,I'm having an issue with the {product_purchase...,Pending Customer Response,,Critical,Chat,2023-06-01 16:45:38,,
2,3,Christopher Robbins,gonzalestracy@example.com,48,Other,Dell XPS,2020-07-14,Technical issue,Network problem,I'm facing a problem with my {product_purchase...,Closed,Case maybe show recently my computer follow.,Low,Social media,2023-06-01 11:14:38,2023-06-01 18:05:38,3.0
3,4,Christina Dillon,bradleyolson@example.org,27,Female,Microsoft Office,2020-11-13,Billing inquiry,Account access,I'm having an issue with the {product_purchase...,Closed,Try capital clearly never color toward story.,Low,Social media,2023-06-01 07:29:40,2023-06-01 01:57:40,3.0
4,5,Alexander Carroll,bradleymark@example.com,67,Female,Autodesk AutoCAD,2020-02-04,Billing inquiry,Data loss,I'm having an issue with the {product_purchase...,Closed,West decision evidence bit.,Low,Email,2023-06-01 00:12:42,2023-06-01 19:53:42,1.0
5,6,Rebecca Fleming,sheenasmith@example.com,53,Male,Microsoft Office,2020-07-28,Cancellation request,Payment issue,I'm facing a problem with my {product_purchase...,Open,,Low,Social media,,,
6,7,Jacqueline Wright,donaldkeith@example.org,24,Other,Microsoft Surface,2020-02-23,Product inquiry,Refund request,I'm unable to access my {product_purchased} ac...,Open,,Critical,Social media,,,
7,8,Denise Lee,joelwilliams@example.com,23,Male,Philips Hue Lights,2020-08-09,Refund request,Battery life,I'm having an issue with the {product_purchase...,Open,,Critical,Social media,,,
8,9,Nicolas Wilson,joshua24@example.com,60,Other,Fitbit Versa Smartwatch,2020-07-16,Technical issue,Installation support,I'm having an issue with the {product_purchase...,Pending Customer Response,,Low,Social media,2023-06-01 10:32:47,,
9,10,William Dawson,clopez@example.com,27,Male,Dyson Vacuum Cleaner,2020-03-06,Refund request,Payment issue,My {product_purchased} is making strange noise...,Pending Customer Response,,Critical,Phone,2023-06-01 09:25:48,,


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8469 entries, 0 to 8468
Data columns (total 17 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Ticket ID                     8469 non-null   int64  
 1   Customer Name                 8469 non-null   object 
 2   Customer Email                8469 non-null   object 
 3   Customer Age                  8469 non-null   int64  
 4   Customer Gender               8469 non-null   object 
 5   Product Purchased             8469 non-null   object 
 6   Date of Purchase              8469 non-null   object 
 7   Ticket Type                   8469 non-null   object 
 8   Ticket Subject                8469 non-null   object 
 9   Ticket Description            8469 non-null   object 
 10  Ticket Status                 8469 non-null   object 
 11  Resolution                    2769 non-null   object 
 12  Ticket Priority               8469 non-null   object 
 13  Tic

Steps for Data Preprocessing
Handle Missing Values
Date Conversion
Categorical Encoding
Feature Engineering
Data Scaling (if necessary for the model)
Splitting the Data into training and testing sets
Here’s how you can systematically approach each step:

1. Handle Missing Values
You can fill in missing values, drop them, or impute them depending on the context of the data.

In [None]:
# Fill missing values for numerical columns with the median or mean
data['First Response Duration'].fillna(data['First Response Duration'].median(), inplace=True)
data['Resolution Duration'].fillna(data['Resolution Duration'].median(), inplace=True)

# For categorical data, you might fill missing values with the mode or a specific category like 'Unknown'
data['Resolution'].fillna('No resolution provided', inplace=True)


Convert Date Columns to DateTime
Converting date columns to datetime objects helps in extracting date related features.

In [None]:
data['Date of Purchase'] = pd.to_datetime(data['Date of Purchase'])
data['First Response Time'] = pd.to_datetime(data['First Response Time'], errors='coerce')
data['Time to Resolution'] = pd.to_datetime(data['Time to Resolution'], errors='coerce')


Categorical Encoding
Transform categorical data into a format that can be provided to machine learning algorithms.

In [None]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
data['Ticket Channel'] = label_encoder.fit_transform(data['Ticket Channel'])
# Repeat for other categorical features


Feature Engineering
Create new features that can potentially help in improving model performance.

In [None]:
# Create time-related features
data['Day of Week'] = data['Date of Purchase'].dt.day_name()
data['Month'] = data['Date of Purchase'].dt.month
data['Year'] = data['Date of Purchase'].dt.year

# Create duration features in minutes
data['Response Time (mins)'] = (data['First Response Time'] - data['Date of Purchase']).dt.total_seconds() / 60


 Data Scaling
Scale the features if you are using algorithms like SVM, KNN, or neural networks that are sensitive to the range of data values.

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[['Customer Age', 'Response Time (mins)']])


 Splitting the Data
Split the data into training and testing sets to ensure the model is tested on unseen data.

In [None]:
from sklearn.model_selection import train_test_split

# Assuming 'Customer Satisfaction Rating' is the target variable
X = data.drop('Customer Satisfaction Rating', axis=1)
y = data['Customer Satisfaction Rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import statsmodels.api as sm

# Assuming data['Date of Purchase'] and a count of daily tickets exists
daily_tickets = data.groupby('Date of Purchase').size()

# Seasonal decomposition to understand components
decomposition = sm.tsa.seasonal_decompose(daily_tickets, model='additive')
decomposition.plot()

# Fit a simple ARIMA model
model = sm.tsa.ARIMA(daily_tickets, order=(1, 1, 1))
results = model.fit()
forecast = results.forecast(steps=30)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Prepare data
X = data[['Customer Age', 'Ticket Priority', 'Ticket Channel']]  # example features
y = data['First Response Duration']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and evaluate
predictions = model.predict(X_test)
print("MSE:", mean_squared_error(y_test, predictions))


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plotting to find high demand periods
plt.figure(figsize=(10, 6))
sns.countplot(x='Month of Purchase', data=data)
plt.title('Ticket Volume by Month')
plt.show()

plt.figure(figsize=(10, 6))
sns.countplot(x='Day of Purchase', data=data)
plt.title('Ticket Volume by Day of Week')
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Assume 'Best Resource' is a feature indicating the best resource for a ticket
X = data[['Ticket Type', 'Ticket Priority', 'Customer Age']]  # example features
y = data['Best Resource']  # This would need to be encoded as well

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train classifier
classifier = RandomForestClassifier(n_estimators=100)
classifier.fit(X_train, y_train)

# Predict
predictions = classifier.predict(X_test)


In [None]:
from sklearn.ensemble import GradientBoostingRegressor

# Prepare data
X = data[['First Response Duration', 'Resolution Duration', 'Ticket Priority']]  # example features
y = data['Customer Satisfaction Rating']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
gb_regressor = GradientBoostingRegressor(n_estimators=100)
gb_regressor.fit(X_train, y_train)

# Predict and evaluate
predictions = gb_regressor.predict(X_test)
print("MSE:", mean_squared_error(y_test, predictions))
