In [55]:
import pandas as pd
import numpy as np
import re
import string
import json
import gradio as gr
import joblib

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from dateutil.parser import parse
import warnings 

warnings.filterwarnings("ignore")
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\samar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\samar\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [56]:
# === Load and Preprocess Data ===
df = pd.read_excel('ai_dev_assignment_tickets_complex_1000.xls')
df.dropna(inplace=True)

In [57]:
# Text preprocessing
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = text.split()
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

df['clean_text'] = df['ticket_text'].apply(preprocess)

In [58]:
# Feature Engineering
df['ticket_length'] = df['ticket_text'].apply(len)


In [59]:
# Labels
# === 3. Label Encoding ===
le_issue = LabelEncoder()
df['issue_type_enc'] = le_issue.fit_transform(df['issue_type'])

le_urgency = LabelEncoder()
df['urgency_level_enc'] = le_urgency.fit_transform(df['urgency_level'])



X_train_texts, X_test_texts, y_issue_train, y_issue_test, y_urgency_train, y_urgency_test = train_test_split(
    df['clean_text'], df['issue_type_enc'], df['urgency_level_enc'], test_size=0.2, random_state=42)

In [60]:
# === 5. TF-IDF Vectorization ===
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train_texts)
X_test = vectorizer.transform(X_test_texts)

In [61]:
# Models
issue_model = RandomForestClassifier(random_state=42)
issue_model.fit(X_train, y_issue_train)

urgency_model = LogisticRegression(max_iter=1000)
urgency_model.fit(X_train, y_urgency_train)

In [62]:
# === 7. Save Models ===
joblib.dump(vectorizer, 'vectorizer.pkl')
joblib.dump(issue_model, 'issue_model.pkl')
joblib.dump(urgency_model, 'urgency_model.pkl')
joblib.dump(le_issue, 'le_issue.pkl')
joblib.dump(le_urgency, 'le_urgency.pkl')


['le_urgency.pkl']

In [75]:
# === Entity Extraction ===
products = ['smartwatch v2', 'soundwave 300', 'photosnap cam', 'ecobreeze ac',
                'robochef blender', 'powermax battery', 'vision led tv',
                'protab x1', 'fitrun treadmill', 'ultraclean vacuum']

complaints= ['broken', 'not working', 'late', 'error', 'issue', 'defective', 'cracked', 'missing', 'damaged', 'faulty']

def extract_entities(text):
    entities = {'products': [], 'dates': [], 'complaints': []}
    text_lower = text.lower()

    for product in products:
        if product in text_lower:
            entities['products'].append(product)

    for complaint in complaints:
        if complaint in text_lower:
            entities['complaints'].append(complaint)

    date_matches = re.findall(r'\b(?:\d{1,2}\s)?(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\b', text_lower)
    for date_str in date_matches:
        try:
            parsed_date = parse(date_str, fuzzy=True, default=pd.Timestamp('2025-01-01'))
            entities['dates'].append(parsed_date.strftime('%d-%B-%Y'))
        except Exception:
            pass

    return entities

In [68]:
# === Analysis Function ===
def analyze_ticket(ticket_text):
    vectorizer = joblib.load('vectorizer.pkl')
    issue_model = joblib.load('issue_model.pkl')
    urgency_model = joblib.load('urgency_model.pkl')
    le_issue = joblib.load('le_issue.pkl')
    le_urgency = joblib.load('le_urgency.pkl')
    
    text_cleaned = preprocess(ticket_text)
    X_input = vectorizer.transform([text_cleaned])

   # Predict
    issue_pred = issue_model.predict(X_input)
    urgency_pred = urgency_model.predict(X_input)

    # Decode labels
    issue = le_issue.inverse_transform(issue_pred)[0]
    urgency = le_urgency.inverse_transform(urgency_pred)[0]

    # Extract entities
    entities = extract_entities(ticket_text)
    
    return {
        "Predicted Issue Type": issue,
        "Predicted Urgency Level": urgency,
        "Extracted Entities": entities
    }

In [77]:
# === 10. Test Example ===
ticket = "my product broken on april 24"
result = analyze_ticket(ticket)
print(json.dumps(result, indent=2))

{
  "Predicted Issue Type": "General Inquiry",
  "Predicted Urgency Level": "High",
  "Extracted Entities": {
    "products": [],
    "dates": [
      "01-April-2025"
    ],
    "complaints": [
      "broken"
    ]
  }
}


In [83]:
# === Gradio Interface ===
def gradio_interface(ticket_text):
    result = analyze_ticket(ticket_text)
    print(result)
    return json.dumps(result, indent=2)
    
iface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=5, placeholder="Enter customer ticket text here..."),
    outputs="text",
    title="Customer Support Ticket Analyzer",
    description="Predict issue type, urgency level, and extract entities from support tickets."
)

iface.launch()


* Running on local URL:  http://127.0.0.1:7866
* To create a public link, set `share=True` in `launch()`.


