In [1]:
#Step 1: Libraries to import packages
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

In [2]:
#Step 2: Load the dataset
df=pd.read_csv("C:\\Users\\user\\Documents\\NCI_AI\\Term 2\\Intelligent Agent\\Dataset\\customer_support_tickets.csv")

In [3]:
#Step 3: Display the sample
print(df.head(5))

   Ticket ID        Customer Name              Customer Email  Customer Age  \
0          1        Marisa Obrien  carrollallison@example.com            32   
1          2         Jessica Rios    clarkeashley@example.com            42   
2          3  Christopher Robbins   gonzalestracy@example.com            48   
3          4     Christina Dillon    bradleyolson@example.org            27   
4          5    Alexander Carroll     bradleymark@example.com            67   

  Customer Gender Product Purchased Date of Purchase      Ticket Type  \
0           Other        GoPro Hero       2021-03-22  Technical issue   
1          Female       LG Smart TV       2021-05-22  Technical issue   
2           Other          Dell XPS       2020-07-14  Technical issue   
3          Female  Microsoft Office       2020-11-13  Billing inquiry   
4          Female  Autodesk AutoCAD       2020-02-04  Billing inquiry   

             Ticket Subject  \
0             Product setup   
1  Peripheral compatibil

In [4]:
#Step 4: Rule-based complexity Labelling
def check_complexity(row):
    if row['Ticket Priority'] == 'Critical':
        return 'expert'
    elif row['Ticket Priority'] == 'High':
        return 'high'
    elif row['Ticket Priority'] == 'Medium':
        return 'medium'
    else:
        return 'low'
#Creates a new column 'complexity'
df['complexity']=df.apply(check_complexity,axis=1)

In [5]:
#Step 5: Map complexity to technician levels
def assign_tech_level(complexity):
    return{'low':'Level 1','medium':'Level 2','high':'Level 3','expert':'Level 4'}.get(complexity,'Unassigned')

#Step 6: Creates a new column 'technician level'
df['technician_level']=df['complexity'].apply(assign_tech_level)

In [33]:
#Step 7: Filter the data and include only open or pending tickets
filtered_df=df[df['Ticket Status'].isin(['Open','Pending','Pending Customer Response'])]

In [34]:
#Step 8: Dataframe created to train model for NLP
df_nlp=df[['Ticket ID','Ticket Description','complexity']].dropna()

In [35]:
#Step 9: Train-test split
X=df_nlp[['Ticket ID','Ticket Description']]
y=df_nlp['complexity']
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [36]:
#Step 10: Define and Train the model
pipeline_model=Pipeline([('tfidfvectorizer',TfidfVectorizer(stop_words='english')),('clf',MultinomialNB())])
pipeline_model.fit(X_train['Ticket Description'] ,y_train)

In [46]:
#step 11: NLP based Prediction on test data
X_test_samp=X_test.sample(20,random_state=5)
X_sample_texts = X_test_samp['Ticket Description'].values


In [47]:
#Transform the text and make predictions
pred_complexity=pipeline_model.predict(X_sample_texts)

In [48]:
# Assuming `model` is your trained Naive Bayes pipeline
probs = pipeline_model.predict_proba(X_sample_texts)
#Getting max confidence score per sample
max_conf_scores = probs.max(axis=1)

In [49]:
#Step 12: AI's Explaination over technician's selection
def explain_selection(pred_complexity,assigned_level,confidence_score):
    reason=""
    if pred_complexity=="low":
        reason+="Issue seems simple or more basic non-technical."
    elif pred_complexity=="medium":
        reason+="Issue shows moderate technical involvement."
    elif pred_complexity=="high":
        reason+="Issue indicates to be very complex or have significant impact."
    elif pred_complexity=="expert":
        reason+="Issue appears to be very advanced or critical."
    reason+=f'The model was {confidence_score:.2f}% confident.'
    reason+=f'Hence, assigned to {assigned_level}.'
    return reason


print(len(X_test_samp))  # Sample data
print(len(pred_complexity))  # Predictions
print(len(max_conf_scores))  # Confidence

20
20
20


In [50]:
#Step 13: Format the output
results=pd.DataFrame({
    'Ticket ID':X_test_samp['Ticket ID'].values,
    'Ticket Description':X_test_samp['Ticket Description'].values,
    'Predicted Complexity':pred_complexity,
    'Assigned Technician Level':[assign_tech_level(comp) for comp in pred_complexity],
    'Confidence Score': max_conf_scores,
    'Explanation': [explain_selection(c, assign_tech_level(c), s) for c, s in zip(pred_complexity, max_conf_scores)]
})



In [51]:
#Step 14: Show results
print("\nTicket Assignments:\n")
print(results.to_string(index=False))


Ticket Assignments:

 Ticket ID                                                                                                                                                                                                                                                                                                                                               Ticket Description Predicted Complexity Assigned Technician Level  Confidence Score                                                                                                             Explanation
      5157                      I'm having an issue with the {product_purchased}. Please assist. If I am required to charge you, please contact me as soon as possible. Please read my Terms of Use.\n\nThanks!\n\nWe hope you enjoyed I'm unable to find the option to perform the desired action in the {product_purchased}. Could you please guide me through the steps?               expert                   Level 4          0.316021 

In [52]:
results.to_csv('Ticket Assignment.csv',index=False)

In [53]:
import joblib

#Save the model
joblib.dump(pipeline_model,"ticket_assign.pkl")

['ticket_assign.pkl']