In [None]:
# 📘 Train IT Helpdesk Ticket Classifier

import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import joblib
import os

In [None]:

# Define ticket categories and sample phrases
categories = ['Network Issue', 'Software Bug', 'Access Request', 'Hardware Failure', 'General Inquiry']

examples = {
    'Network Issue': [
        'Cannot connect to WiFi', 'Internet is down in my office',
        'VPN keeps disconnecting', 'Slow network speed'
    ],
    'Software Bug': [
        'Application crashes on login', 'Error 500 when submitting form',
        'Software freezes randomly', 'Bug in the reporting module'
    ],
    'Access Request': [
        'Need access to shared drive', 'Requesting access to email group',
        'Grant access to Jira project', 'Need database read permissions'
    ],
    'Hardware Failure': [
        'Laptop not turning on', 'Monitor flickering', 'Mouse not detected',
        'Hard disk failure warning'
    ],
    'General Inquiry': [
        'What’s the IT policy on USBs?', 'How to reset my password?',
        'Where can I report phishing emails?', 'How do I order new equipment?'
    ]
}

# Simulate a larger dataset (e.g., 300 samples)
data = {'description': [], 'category': []}
for _ in range(300):
    cat = random.choice(categories)
    data['description'].append(random.choice(examples[cat]))
    data['category'].append(cat)

# Convert to DataFrame
df = pd.DataFrame(data)
df.head()


In [None]:


# Save to CSV
os.makedirs("../data", exist_ok=True)
df.to_csv("../data/tickets.csv", index=False)


In [None]:
# 2. Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(df['description'], df['category'], test_size=0.2, random_state=42)

In [None]:
# 3. Create Pipeline
model = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('nb', MultinomialNB())
])

In [None]:
# 4. Train
model.fit(X_train, y_train)

In [None]:
# 5. Evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# 6. Save Model
os.makedirs("../models", exist_ok=True)
joblib.dump(model, "../models/ticket_classifier.pkl")

In [None]:
# 7. Predict Example
sample = ["Wi-Fi keeps disconnecting when I use Zoom"]
predicted = model.predict(sample)
print(f"\n🧠 Prediction for '{sample[0]}': {predicted[0]}")