# NLP Support Ticket Auto-Routing

TF-IDF + linear classifier + exported model + confusion matrix.


In [None]:
import os
from pathlib import Path

def find_project_root(start: Path, marker: str = "04_nlp_support_ticket_routing") -> Path:
    p = start.resolve()
    for parent in [p] + list(p.parents):
        if parent.name == marker:
            return parent
    return start.resolve()

ROOT = find_project_root(Path.cwd())
os.chdir(ROOT)
print("Project root:", ROOT)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import joblib


In [None]:
data_path = Path("data/tickets.csv")
if not data_path.exists():
    from data.make_dataset import main as make_data
    make_data(out_path=str(data_path))

df = pd.read_csv(data_path)
df.head()


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df["text"], df["category"], test_size=0.2, random_state=42, stratify=df["category"]
)

pipe = Pipeline([
    ("tfidf", TfidfVectorizer(ngram_range=(1,2), min_df=2, max_features=25000)),
    ("clf", LogisticRegression(max_iter=2000))
])

pipe.fit(X_train, y_train)
pred = pipe.predict(X_test)

print(classification_report(y_test, pred, digits=3))


In [None]:
labels = sorted(df["category"].unique())
cm = confusion_matrix(y_test, pred, labels=labels)

plt.figure(figsize=(6,5))
plt.imshow(cm)
plt.title("Confusion Matrix")
plt.xticks(range(len(labels)), labels, rotation=45, ha="right")
plt.yticks(range(len(labels)), labels)
for i in range(len(labels)):
    for j in range(len(labels)):
        plt.text(j, i, int(cm[i,j]), ha="center", va="center")
plt.tight_layout()

Path("reports").mkdir(exist_ok=True)
plt.savefig("reports/confusion_matrix.png", dpi=200, bbox_inches="tight")
plt.show()


In [None]:
Path("models").mkdir(exist_ok=True)
Path("reports").mkdir(exist_ok=True)

joblib.dump(pipe, "models/ticket_router.joblib")
Path("reports/metrics.json").write_text(pd.Series({"note": "See classification report in notebook output."}).to_json(), encoding="utf-8")
print("Saved models/ticket_router.joblib and reports/*")
