# PratChat - Intent Classifier Training
This notebook trains the intent classification model for PratChat hybrid AI system.

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import joblib

## Load Intent Data

In [None]:
with open('data/intents.json', 'r') as f:
    data = json.load(f)

intents = data['intents']
print(f"Loaded {len(intents)} intent categories")

## Prepare Training Data

In [None]:
X = []
y = []
intent_responses = {}

for intent in intents:
    tag = intent['tag']
    intent_responses[tag] = intent['responses']
    for pattern in intent['patterns']:
        X.append(pattern.lower())
        y.append(tag)

print(f"Total training samples: {len(X)}")
print(f"Intent labels: {set(y)}")

## Split Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

## Vectorize Text (TF-IDF)

In [None]:
vectorizer = TfidfVectorizer(max_features=100, ngram_range=(1, 2))
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

print(f"Feature dimensions: {X_train_vec.shape}")

## Train Logistic Regression Model

In [None]:
classifier = LogisticRegression(max_iter=200)
classifier.fit(X_train_vec, y_train)
print("Model trained successfully!")

## Evaluate Model

In [None]:
y_pred = classifier.predict(X_test_vec)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

## Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

## Save Model

In [None]:
import os
os.makedirs('models', exist_ok=True)

joblib.dump(vectorizer, 'models/vectorizer.pkl')
joblib.dump(classifier, 'models/classifier.pkl')
joblib.dump(list(set(y)), 'models/intent_labels.pkl')
joblib.dump(intent_responses, 'models/intent_responses.pkl')

print("Models saved to models/ directory")