In [1]:
# chatbot_model.ipynb

import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
import joblib
import os

# Load dataset
with open('intents.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

sentences = []
labels = []

for item in data:
    
    for example in item['examples']:
        sentences.append(example.lower())
        labels.append(item['intent'])
      
# Encode labels
le = LabelEncoder()
y = le.fit_transform(labels)

# pipeline = Pipeline([
#     ('tfidf', TfidfVectorizer()),
#     ('clf', LogisticRegression(max_iter=1000))
# ])
# Create enhanced pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(
        lowercase=True,
        stop_words='english',
        ngram_range=(1, 2),
        token_pattern=r"(?u)\b\w+\b"  # removes punctuations
    )),
    ('clf', LogisticRegression(max_iter=1000))
])

# Train model
pipeline.fit(sentences, y)

# Create model directory if it doesn't exist
os.makedirs("model", exist_ok=True)

# Save model and label encoder
joblib.dump(pipeline, 'model/intent_model.pkl')
joblib.dump(le, 'model/label_encoder.pkl')

print("✅ Model training complete and saved.")


✅ Model training complete and saved.
