<a href="https://colab.research.google.com/github/Mayan-create/ticket_classifier/blob/main/task1_ticket_classifier_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
# 1. Imports
from IPython import get_ipython
from IPython.display import display
import pandas as pd
import re
import nltk
import dateparser
import numpy as np
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.sparse import hstack
from textblob import TextBlob
import gradio as gr # Import gradio at the beginning

# Download NLTK resources (consider doing this separately or with a check)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('wordnet')
nltk.download('omw-1.4')

# 2. Load Dataset (handle file path dynamically)
df = pd.read_excel('/content/ai_dev_assignment_tickets_complex_1000.xls')
df.dropna(subset=['ticket_text', 'issue_type', 'urgency_level', 'product'], inplace=True)
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

# 3. Text Preprocessing (define function and apply)
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):


  tokens = word_tokenize(text.lower())                          # Tokenize and lowercase
  tokens = [t for t in tokens if t.isalpha()]                   # Remove punctuation/numbers
  tokens = [t for t in tokens if t not in stop_words]           # Remove stopwords
  tokens = [lemmatizer.lemmatize(t) for t in tokens]            # Lemmatize
  return ' '.join(tokens)

df['clean_text'] = df['ticket_text'].apply(preprocess)

# 4. Feature Engineering (initialize and apply vectorizer, add additional features)
tfidf = TfidfVectorizer(max_features=1000)
X_tfidf = tfidf.fit_transform(df['clean_text'])

df['ticket_length'] = df['clean_text'].apply(lambda x: len(x.split()))
df['sentiment'] = df['ticket_text'].apply(lambda x: TextBlob(str(x)).sentiment.polarity if pd.notnull(x) else 0)
df[['ticket_length', 'sentiment']] = df[['ticket_length', 'sentiment']].fillna(0)

X_additional = df[['ticket_length', 'sentiment']].values
X = hstack([X_tfidf, X_additional])

# Labels
y_issue = df['issue_type']
y_urgency = df['urgency_level']

# 5. Train/Test Split
X_train, X_test, y_issue_train, y_issue_test, y_urgency_train, y_urgency_test = train_test_split(
    X, y_issue, y_urgency, test_size=0.2, random_state=42
)

# 6. Train Classifiers
model_issue = RandomForestClassifier()
model_issue.fit(X_train, y_issue_train)

model_urgency = RandomForestClassifier()
model_urgency.fit(X_train, y_urgency_train)

# Predictions & Evaluation
pred_issue = model_issue.predict(X_test)
pred_urgency = model_urgency.predict(X_test)

print("Issue Type Classification Report:")
print(classification_report(y_issue_test, pred_issue))

print("Urgency Level Classification Report:")
print(classification_report(y_urgency_test, pred_urgency))

# 7. Entity Extraction (define function)
complaint_keywords = ['broken', 'error', 'late', 'crash', 'delay', 'issue', 'problem', 'missing']

def extract_entities(text):
    text_lower = str(text).lower()
    product = next((p.lower() for p in df['product'].unique() if isinstance(p, str) and p.lower() in text_lower), None)
    date = dateparser.parse(text, settings={'PREFER_DATES_FROM': 'past'})
    keywords = [kw for kw in complaint_keywords if kw in text_lower]

    return {
        "product": product,
        "date": str(date.date()) if date else None,
        "complaint_keywords": keywords
    }

# 8. Integration Function for Gradio (ensure this is the only definition)
def process_ticket(text):
    cleaned = preprocess(text)
    tfidf_vector = tfidf.transform([cleaned])
    extra = np.array([[len(cleaned.split()), TextBlob(str(text)).sentiment.polarity]])
    final_vector = hstack([tfidf_vector, extra])

    issue = model_issue.predict(final_vector)[0]
    urgency = model_urgency.predict(final_vector)[0]
    entities = extract_entities(text)

    return issue, urgency, str(entities) # Return entities as string for Textbox output

# Define the interface
iface = gr.Interface(
    fn=process_ticket,
    inputs=gr.Textbox(lines=5, placeholder="Enter ticket text here..."),
    outputs=[
        gr.Textbox(label="Predicted Issue Type"),
        gr.Textbox(label="Predicted Urgency"),
        gr.Textbox(label="Extracted Entities"),
    ],
    title="Ticket Classifier & Entity Extractor",
    description="Paste a support ticket below and get issue type, urgency, and key entities."
)

# Launch Gradio interface (optional, you might want to run this in a separate cell)
# iface.launch()


# 9. Example Test
example = "My new ZPhone crashed yesterday and it's completely broken. I need help now!"
print(process_ticket(example))
import nltk


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Issue Type Classification Report:
                    precision    recall  f1-score   support

    Account Access       1.00      1.00      1.00        23
   Billing Problem       1.00      1.00      1.00        19
   General Inquiry       1.00      1.00      1.00        25
Installation Issue       1.00      1.00      1.00        29
     Late Delivery       1.00      1.00      1.00        17
    Product Defect       1.00      1.00      1.00        30
        Wrong Item       1.00      1.00      1.00        23

          accuracy                           1.00       166
         macro avg       1.00      1.00      1.00       166
      weighted avg       1.00      1.00      1.00       166

Urgency Level Classification Report:
              precision    recall  f1-score   support

        High       0.29      0.29      0.29        66
         Low       0.25      0.33      0.29        43
      Medium       0.37      0.30      0.33        57

    accuracy                           0.30     

In [7]:
pip install dateparser

Collecting dateparser
  Downloading dateparser-1.2.1-py3-none-any.whl.metadata (29 kB)
Downloading dateparser-1.2.1-py3-none-any.whl (295 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.7/295.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dateparser
Successfully installed dateparser-1.2.1


In [12]:
iface.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4997547df9dffaeb47.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [1]:
!pip install nltk
import nltk
nltk.download('punkt_tab')



[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [4]:
pip install gradio


Collecting gradio
  Downloading gradio-5.31.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.