In [20]:
# ===============================
# Data Handling
# ===============================
import pandas as pd
import numpy as np

# ===============================
# Text Feature Extraction
# ===============================
from sklearn.feature_extraction.text import TfidfVectorizer

# ===============================
# Machine Learning Models
# ===============================
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# ===============================
# Model Training Utilities
# ===============================
from sklearn.model_selection import train_test_split

# ===============================
# Evaluation Metrics
# ===============================
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

# ===============================
# Model Saving / Loading
# ===============================
import joblib

# ===============================
# NLP Information Extraction
# ===============================
import spacy
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC


In [21]:
import pandas as pd

df = pd.read_csv("english_tickets_preprocessed.csv")

print(df.shape)
print(df.columns)
df.head()


(28261, 19)
Index(['subject', 'body', 'answer', 'type', 'queue', 'priority', 'version',
       'tag_1', 'tag_2', 'tag_3', 'tag_4', 'tag_5', 'tag_6', 'tag_7', 'tag_8',
       'subject_clean', 'body_clean', 'text', 'priority_num'],
      dtype='object')


Unnamed: 0,subject,body,answer,type,queue,priority,version,tag_1,tag_2,tag_3,tag_4,tag_5,tag_6,tag_7,tag_8,subject_clean,body_clean,text,priority_num
0,Account Disruption,"Dear Customer Support Team,\n\nI am writing to...","Thank you for reaching out, <name>. We are awa...",Incident,Technical Support,high,51.0,Account,Disruption,Outage,IT,Tech Support,Unknown,Unknown,Unknown,account disruption,dear customer support teamnni am writing to re...,account disruption dear customer support teamn...,2
1,Query About Smart Home System Integration Feat...,"Dear Customer Support Team,\n\nI hope this mes...",Thank you for your inquiry. Our products suppo...,Request,Returns and Exchanges,medium,51.0,Product,Feature,Tech Support,Unknown,Unknown,Unknown,Unknown,Unknown,query about smart home system integration feat...,dear customer support teamnni hope this messag...,query about smart home system integration feat...,1
2,Inquiry Regarding Invoice Details,"Dear Customer Support Team,\n\nI hope this mes...",We appreciate you reaching out with your billi...,Request,Billing and Payments,low,51.0,Billing,Payment,Account,Documentation,Feedback,Unknown,Unknown,Unknown,inquiry regarding invoice details,dear customer support teamnni hope this messag...,inquiry regarding invoice details dear custome...,0
3,Question About Marketing Agency Software Compa...,"Dear Support Team,\n\nI hope this message reac...",Thank you for your inquiry. Our product suppor...,Problem,Sales and Pre-Sales,medium,51.0,Product,Feature,Feedback,Tech Support,Unknown,Unknown,Unknown,Unknown,question about marketing agency software compa...,dear support teamnni hope this message reaches...,question about marketing agency software compa...,1
4,Feature Query,"Dear Customer Support,\n\nI hope this message ...",Thank you for your inquiry. Please specify whi...,Request,Technical Support,high,51.0,Feature,Product,Documentation,Feedback,Unknown,Unknown,Unknown,Unknown,feature query,dear customer supportnni hope this message rea...,feature query dear customer supportnni hope th...,2


In [22]:
y_type = df['type']
y_priority = df['priority_num']
y_queue = df['queue']
X = df['text']


In [23]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(max_features=5000)

X_features = tfidf.fit_transform(X)


In [24]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_features, y_type, test_size=0.2, random_state=42
)


In [25]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [26]:
from sklearn.metrics import classification_report

predictions = model.predict(X_test)

print(classification_report(y_test, predictions))


              precision    recall  f1-score   support

      Change       0.99      0.91      0.95       595
    Incident       0.77      0.89      0.83      2249
     Problem       0.71      0.51      0.59      1200
     Request       0.97      1.00      0.98      1609

    accuracy                           0.84      5653
   macro avg       0.86      0.83      0.84      5653
weighted avg       0.84      0.84      0.83      5653



In [27]:
X_train, X_test, y_train, y_test = train_test_split(
    X_features, df['priority_num'], test_size=0.2, random_state=42
)

priority_model = LogisticRegression(max_iter=1000)
priority_model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [28]:
X_train, X_test, y_train, y_test = train_test_split(
    X_features, df['queue'], test_size=0.2, random_state=42
)

queue_model = LogisticRegression(max_iter=1000)
queue_model.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [29]:
def predict_ticket(ticket_text):
    
    text_vector = tfidf.transform([ticket_text])
    
    category = model.predict(text_vector)[0]
    priority = int(priority_model.predict(text_vector)[0])
    queue = queue_model.predict(text_vector)[0]
    
    return {
        "Category": category,
        "Priority": priority,
        "Recommended Team": queue
    }


In [30]:
predict_ticket("My system crashes when opening dashboard")


{'Category': 'Incident',
 'Priority': 2,
 'Recommended Team': 'Technical Support'}

In [32]:
import spacy

nlp = spacy.load("en_core_web_sm")

def extract_info(text):
    doc = nlp(text)
    
    return [(ent.text, ent.label_) for ent in doc.ents]


In [33]:
df['queue'].value_counts()
df['priority'].value_counts()


priority
medium    11570
high      10917
low        5774
Name: count, dtype: int64

In [34]:
import joblib

joblib.dump(model, "category_model.pkl")
joblib.dump(priority_model, "priority_model.pkl")
joblib.dump(queue_model, "queue_model.pkl")
joblib.dump(tfidf, "tfidf_vectorizer.pkl")


['tfidf_vectorizer.pkl']

In [35]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)


In [36]:
svm_model = LinearSVC()

svm_model.fit(X_train, y_train)

svm_pred = svm_model.predict(X_test)


In [37]:
from sklearn.metrics import classification_report

print("Random Forest Results")
print(classification_report(y_test, rf_pred))

print("SVM Results")
print(classification_report(y_test, svm_pred))


Random Forest Results
                                 precision    recall  f1-score   support

           Billing and Payments       0.96      0.81      0.88       550
               Customer Service       0.68      0.69      0.68       845
                General Inquiry       1.00      0.45      0.62        95
                Human Resources       0.98      0.54      0.70       114
                     IT Support       0.97      0.54      0.70       660
                Product Support       0.82      0.66      0.73      1107
          Returns and Exchanges       0.99      0.57      0.73       264
            Sales and Pre-Sales       1.00      0.43      0.60       173
Service Outages and Maintenance       0.96      0.67      0.79       197
              Technical Support       0.60      0.95      0.73      1648

                       accuracy                           0.73      5653
                      macro avg       0.90      0.63      0.72      5653
                   weighted

In [38]:
def predict_ticket(ticket_text):
    
    text_vector = tfidf.transform([ticket_text])
    
    category = svm_model.predict(text_vector)[0]
    priority = int(priority_model.predict(text_vector)[0])
    queue = queue_model.predict(text_vector)[0]
    
    return {
        "Category": category,
        "Priority": priority,
        "Recommended Team": queue
    }


In [39]:
from sklearn.metrics import accuracy_score

print("RF Accuracy:", accuracy_score(y_test, rf_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))


RF Accuracy: 0.7323545020343181
SVM Accuracy: 0.5503272598620201


In [None]:
import streamlit as st
import joblib

# Load Models
category_model = joblib.load("category_model.pkl")
priority_model = joblib.load("priority_model.pkl")
queue_model = joblib.load("queue_model.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")

priority_map = {
    1: "Low",
    2: "Medium",
    3: "High"
}

# Prediction Function
def predict_ticket(ticket_text):

    text_vector = tfidf.transform([ticket_text])

    category = category_model.predict(text_vector)[0]
    priority = int(priority_model.predict(text_vector)[0])
    queue = queue_model.predict(text_vector)[0]

    return category, priority_map.get(priority, priority), queue


# Streamlit UI
st.title("AI IT Ticket Assistant")

ticket_text = st.text_area("Enter Ticket Description")

if st.button("Predict"):

    if ticket_text.strip() == "":
        st.warning("Please enter ticket description")
    else:
        category, priority, queue = predict_ticket(ticket_text)

        st.success("Prediction Result")

        st.write("Category:", category)
        st.write("Priority:", priority)
        st.write("Recommended Team:", queue)


2026-02-07 23:52:39.027 
  command:

    streamlit run C:\Users\Hc\AppData\Roaming\Python\Python310\site-packages\ipykernel_launcher.py [ARGUMENTS]
2026-02-07 23:52:39.042 Session state does not function when running a script without `streamlit run`
