In [None]:
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
import joblib
from ticket_classifier_api.utils import clean_text

# load intents
with open("ticket_classifier_api/intents.json") as f:
    data = json.load(f)["intents"]

X_raw, y = [], []

for intent in data:
    for pattern in intent["patterns"]:
        X_raw.append(pattern)
        y.append(intent["tag"])

# clean input
X = [clean_text(text) for text in X_raw]

# encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# build pipeline
model = Pipeline([
    ("tfidf", TfidfVectorizer(max_features=5000)),
    ("clf", LogisticRegression(max_iter=1000))
])

# train model
model.fit(X, y_encoded)

# save model and encoder
joblib.dump(model, "ticket_classifier_api/model/chatbot_model.pkl")
joblib.dump(encoder, "ticket_classifier_api/model/label_encoder.pkl")


['ticket_classifier_api/model/label_encoder.pkl']

In [15]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = model.predict(X)
print(classification_report(y_encoded, y_pred, target_names=encoder.classes_))

print("\n confusion matrix:")
print(confusion_matrix(y_encoded, y_pred))

accuracy = np.mean(y_encoded == y_pred)
print(f"\n training accuracy: {accuracy:.2%}")


               precision    recall  f1-score   support

create_ticket       1.00      1.00      1.00        10
delete_ticket       1.00      1.00      1.00        10
   get_status       1.00      1.00      1.00        10

     accuracy                           1.00        30
    macro avg       1.00      1.00      1.00        30
 weighted avg       1.00      1.00      1.00        30


 confusion matrix:
[[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]

 training accuracy: 100.00%
