In [None]:
#!/usr/bin/env python3
"""
Chattea Intent Classifier - CNN + Word2Vec (Optimized)

Run:
    python chattea_cnn.py

Required files (same directory):
- chatbot_dataset.csv  (must contain 'text' and 'intent' columns)
- responses.json       (bilingual or single-language responses)

Outputs saved:
- word2vec.model
- chattea.pth
"""

import json
import pandas as pd
import re
import os
import sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from gensim.models import Word2Vec
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from difflib import get_close_matches, SequenceMatcher
import warnings
import time
warnings.filterwarnings('ignore')

# ============================================================================
# CONFIGURATION
# ============================================================================

class Config:
    # File paths
    DATASET_PATH = "chatbot_dataset.csv"
    RESPONSES_PATH = "responses.json"
    MODEL_PATH = "cnn_chattea.pth"
    WORD2VEC_PATH = "word2vec.model"

    # Word2Vec parameters (OPTIMIZED)
    EMBEDDING_DIM = 100        # Embedding dimension
    WORD2VEC_WINDOW = 5        # Context window
    WORD2VEC_MIN_COUNT = 1     # Minimum word frequency
    WORD2VEC_SG = 1            # Skip-gram (better for small datasets)

    # CNN parameters (OPTIMIZED)
    NUM_FILTERS = 128          # Filters per kernel
    KERNEL_SIZES = [2, 3, 4]   # Includes 2-word phrases!
    DROPOUT = 0.5              # Higher regularization
    MAX_SEQ_LENGTH = 20        # Shorter = more efficient

    # Training parameters
    BATCH_SIZE = 32
    EPOCHS = 30
    LEARNING_RATE = 0.001
    TEST_SIZE = 0.2
    RANDOM_SEED = 42

    # Inference parameters
    FUZZY_CUTOFF = 0.8
    CONFIDENCE_THRESHOLD = 0.75

    # Device
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

config = Config()

# Reproducibility
torch.manual_seed(Config.RANDOM_SEED)
np.random.seed(Config.RANDOM_SEED)

In [None]:
# ============================================================================
# DATASET SANITY CHECK & EXPLORATORY ANALYSIS
# ============================================================================

print("=" * 80)
print("üìä DATASET SANITY CHECK & ANALYSIS")
print("=" * 80)

# Load dataset
df = pd.read_csv(Config.DATASET_PATH)

print("\n1Ô∏è‚É£  BASIC STATISTICS")
print("-" * 80)
print(f"Total samples: {len(df)}")
print(f"Total intents: {df['intent'].nunique()}")
print(f"Columns: {list(df.columns)}")

# Check for missing values
print("\n2Ô∏è‚É£  DATA QUALITY")
print("-" * 80)
missing = df.isnull().sum()
print("Missing values:")
for col in df.columns:
    print(f"  {col}: {missing[col]} ({missing[col]/len(df)*100:.2f}%)")

# Check for duplicates
duplicates = df.duplicated().sum()
print(f"\nDuplicate rows: {duplicates} ({duplicates/len(df)*100:.2f}%)")

# Intent distribution
print("\n3Ô∏è‚É£  INTENT DISTRIBUTION")
print("-" * 80)
intent_counts = df['intent'].value_counts()
print(intent_counts)
print(f"\nMost common: {intent_counts.index[0]} ({intent_counts.iloc[0]} samples)")
print(f"Least common: {intent_counts.index[-1]} ({intent_counts.iloc[-1]} samples)")
print(f"Class imbalance ratio: {intent_counts.iloc[0] / intent_counts.iloc[-1]:.2f}x")

# Text length analysis
print("\n4Ô∏è‚É£  TEXT LENGTH ANALYSIS")
print("-" * 80)
df['text_length'] = df['text'].str.len()
df['word_count'] = df['text'].str.split().str.len()

print(f"Character length:")
print(f"  Min: {df['text_length'].min()}")
print(f"  Max: {df['text_length'].max()}")
print(f"  Mean: {df['text_length'].mean():.2f}")
print(f"  Median: {df['text_length'].median():.0f}")

print(f"\nWord count:")
print(f"  Min: {df['word_count'].min()}")
print(f"  Max: {df['word_count'].max()}")
print(f"  Mean: {df['word_count'].mean():.2f}")
print(f"  Median: {df['word_count'].median():.0f}")

# Find longest sentences
print("\n5Ô∏è‚É£  LONGEST SENTENCES (Top 5)")
print("-" * 80)
longest = df.nlargest(5, 'word_count')[['text', 'intent', 'word_count']]
for idx, row in longest.iterrows():
    print(f"\n[{row['word_count']} words] Intent: {row['intent']}")
    print(f"Text: {row['text']}")

# Find shortest sentences
print("\n6Ô∏è‚É£  SHORTEST SENTENCES (Top 5)")
print("-" * 80)
shortest = df.nsmallest(5, 'word_count')[['text', 'intent', 'word_count']]
for idx, row in shortest.iterrows():
    print(f"\n[{row['word_count']} words] Intent: {row['intent']}")
    print(f"Text: {row['text']}")

# Vocabulary analysis
print("\n7Ô∏è‚É£  VOCABULARY STATISTICS")
print("-" * 80)
all_words = []
for text in df['text']:
    all_words.extend(str(text).lower().split())

unique_words = set(all_words)
print(f"Total words (with repetition): {len(all_words)}")
print(f"Unique words: {len(unique_words)}")
print(f"Vocabulary richness: {len(unique_words)/len(all_words):.4f}")

# Most common words
from collections import Counter
word_freq = Counter(all_words)
print(f"\nMost common words (Top 10):")
for word, count in word_freq.most_common(10):
    print(f"  '{word}': {count} times")

# Justification for hyperparameters
print("\n8Ô∏è‚É£  HYPERPARAMETER JUSTIFICATION")
print("-" * 80)
max_words = df['word_count'].max()
mean_words = df['word_count'].mean()
percentile_95 = df['word_count'].quantile(0.95)

print(f"‚úì MAX_SEQ_LENGTH = {Config.MAX_SEQ_LENGTH}")
print(f"  Rationale: 95th percentile = {percentile_95:.0f} words")
print(f"  Only {(df['word_count'] > Config.MAX_SEQ_LENGTH).sum()} samples ({(df['word_count'] > Config.MAX_SEQ_LENGTH).sum()/len(df)*100:.2f}%) exceed this length")

print(f"\n‚úì EMBEDDING_DIM = {Config.EMBEDDING_DIM}")
print(f"  Rationale: Vocabulary size = {len(unique_words)}")
print(f"  Rule of thumb: embedding_dim ‚âà vocab_size^0.25 = {len(unique_words)**0.25:.0f}")
print(f"  100 dimensions provides good balance for vocab of ~1000 words")

print(f"\n‚úì KERNEL_SIZES = {Config.KERNEL_SIZES}")
print(f"  Rationale: Mean sentence length = {mean_words:.1f} words")
print(f"  Kernels [2,3,4] capture 2-4 word phrases (n-grams)")
print(f"  Examples: 'send message' (2), 'how to send' (3), 'create new instance now' (4)")

print(f"\n‚úì BATCH_SIZE = {Config.BATCH_SIZE}")
print(f"  Rationale: Dataset size = {len(df)} samples")
print(f"  {len(df)//Config.BATCH_SIZE} batches per epoch")
print(f"  Provides good gradient estimation without excessive memory usage")

print(f"\n‚úì DROPOUT = {Config.DROPOUT}")
print(f"  Rationale: Small dataset ({len(df)} samples) ‚Üí high overfitting risk")
print(f"  Higher dropout (0.5) provides aggressive regularization")

# Class balance visualization
print("\n9Ô∏è‚É£  CLASS BALANCE CHECK")
print("-" * 80)
min_samples = intent_counts.min()
max_samples = intent_counts.max()
imbalance = max_samples / min_samples

if imbalance < 1.5:
    print("‚úì Classes are WELL BALANCED (ratio < 1.5x)")
elif imbalance < 3:
    print("‚ö†Ô∏è  Classes are MODERATELY IMBALANCED (ratio 1.5-3x)")
else:
    print("‚ùå Classes are SEVERELY IMBALANCED (ratio > 3x)")
    print("   Consider: class weighting, oversampling minority, or undersampling majority")

print(f"   Imbalance ratio: {imbalance:.2f}x")

# Sample queries per intent
print("\nüîü SAMPLE QUERIES PER INTENT (3 examples each)")
print("-" * 80)
for intent in df['intent'].unique()[:5]:  # Show first 5 intents
    print(f"\nüìå Intent: {intent}")
    samples = df[df['intent'] == intent]['text'].head(3).tolist()
    for i, sample in enumerate(samples, 1):
        print(f"   {i}. {sample}")

print("\n" + "=" * 80)
print("‚úÖ DATASET SANITY CHECK COMPLETE!")
print("=" * 80)

# Clean up temporary columns
df = df.drop(['text_length', 'word_count'], axis=1)

In [None]:
# ============================================================================
# DEVICE SETUP
# ============================================================================

print("=" * 80)
print("CHATTEA INTENT CLASSIFIER - CNN + WORD2VEC")
print("=" * 80)
print(f"Device: {config.DEVICE}")
if torch.cuda.is_available():
    try:
        print(f"GPU: {torch.cuda.get_device_name(0)}")
    except Exception:
        pass
print("=" * 80)

CHATTEA INTENT CLASSIFIER - CNN + WORD2VEC
Device: cuda
GPU: NVIDIA GeForce RTX 4060 Laptop GPU


In [None]:
# ============================================================================
# TEXT PROCESSING
# ============================================================================

def clean_text(text):
    """Clean and normalize text"""
    text = str(text).lower()
    text = re.sub(r"[^\w\s]", "", text)  # Remove punctuation
    text = re.sub(r"\s+", " ", text).strip()  # Normalize whitespace
    return text

def tokenize(text):
    """Tokenize text into words"""
    return clean_text(text).split()

def build_vocabulary(texts):
    """Extract all unique words from texts"""
    vocab = set()
    for text in texts:
        vocab.update(re.findall(r'\w+', str(text).lower()))
    return vocab

def fuzzy_correct(text, vocab, cutoff=Config.FUZZY_CUTOFF):
    """Correct typos using difflib.get_close_matches"""
    words = re.findall(r'\w+', text.lower())
    corrected = []
    for word in words:
        matches = get_close_matches(word, vocab, n=1, cutoff=cutoff)
        corrected.append(matches[0] if matches else word)
    return ' '.join(corrected)

In [None]:
# ============================================================================
# WORD2VEC EMBEDDER
# ============================================================================

class Word2VecEmbedder:
    """Word2Vec embedding wrapper with proper initialization"""

    def __init__(self):
        self.model = None
        self.word2idx = {"<PAD>": 0, "<UNK>": 1}
        self.idx2word = {}
        self.embedding_matrix = None
        self.vocab_size = 0
        self.embed_dim = Config.EMBEDDING_DIM

    def train(self, sentences):
        """Train Word2Vec on tokenized sentences"""
        print("\nüß† Training Word2Vec...")
        self.model = Word2Vec(
            sentences=sentences,
            vector_size=Config.EMBEDDING_DIM,
            window=Config.WORD2VEC_WINDOW,
            min_count=Config.WORD2VEC_MIN_COUNT,
            sg=Config.WORD2VEC_SG,
            seed=Config.RANDOM_SEED,
            workers=4
        )

        idx = 2
        for word in self.model.wv.index_to_key:
            self.word2idx[word] = idx
            idx += 1

        self.idx2word = {idx: word for word, idx in self.word2idx.items()}
        self.vocab_size = len(self.word2idx)

        self.embedding_matrix = np.zeros((self.vocab_size, self.embed_dim), dtype=np.float32)

        for word, idx in self.word2idx.items():
            if word in ['<PAD>', '<UNK>']:
                if word == '<UNK>':
                    self.embedding_matrix[idx] = np.random.randn(self.embed_dim) * 0.01
            else:
                try:
                    self.embedding_matrix[idx] = self.model.wv[word]
                except KeyError:
                    self.embedding_matrix[idx] = np.random.randn(self.embed_dim) * 0.01

        print(f"‚úì Word2Vec trained: vocab={self.vocab_size}, dim={self.embed_dim}")
        return self

    def save(self, path=Config.WORD2VEC_PATH):
        if self.model:
            self.model.save(path)
            print(f"‚úì Word2Vec saved to {path}")

    def load(self, path=Config.WORD2VEC_PATH):
        print(f"\nüß† Loading Word2Vec from {path}...")
        self.model = Word2Vec.load(path)

        self.word2idx = {"<PAD>": 0, "<UNK>": 1}
        idx = 2
        for word in self.model.wv.index_to_key:
            self.word2idx[word] = idx
            idx += 1

        self.idx2word = {idx: word for word, idx in self.word2idx.items()}
        self.vocab_size = len(self.word2idx)

        self.embedding_matrix = np.zeros((self.vocab_size, self.embed_dim), dtype=np.float32)

        for word, idx in self.word2idx.items():
            if word in ['<PAD>', '<UNK>']:
                if word == '<UNK>':
                    self.embedding_matrix[idx] = np.random.randn(self.embed_dim) * 0.01
            else:
                try:
                    self.embedding_matrix[idx] = self.model.wv[word]
                except KeyError:
                    self.embedding_matrix[idx] = np.random.randn(self.embed_dim) * 0.01

        print(f"‚úì Word2Vec loaded: vocab={self.vocab_size}, dim={self.embed_dim}")
        return self

    def encode_sequence(self, tokens, max_length=Config.MAX_SEQ_LENGTH):
        indices = [self.word2idx.get(token, self.word2idx["<UNK>"]) for token in tokens[:max_length]]
        while len(indices) < max_length:
            indices.append(self.word2idx["<PAD>"])
        return indices

    def sentence_vector(self, tokens):
        vectors = []
        for token in tokens:
            if token in self.word2idx and token not in ("<PAD>", "<UNK>"):
                idx = self.word2idx[token]
                if idx < len(self.embedding_matrix):
                    vectors.append(self.embedding_matrix[idx])
        if len(vectors) == 0:
            return np.zeros(self.embed_dim, dtype=np.float32)
        return np.mean(vectors, axis=0)

In [None]:
# ============================================================================
# CNN MODEL ARCHITECTURE
# ============================================================================

class TextCNN(nn.Module):
    """CNN for Text Classification (Kim, 2014)"""

    def __init__(self, vocab_size, embedding_dim, num_classes, embedding_matrix=None):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        if embedding_matrix is not None:
            self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
            print("‚úì CNN initialized with Word2Vec embeddings!")
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim, out_channels=Config.NUM_FILTERS, kernel_size=k)
            for k in Config.KERNEL_SIZES
        ])
        self.dropout = nn.Dropout(Config.DROPOUT)
        self.fc = nn.Linear(Config.NUM_FILTERS * len(Config.KERNEL_SIZES), num_classes)

    def forward(self, x):
        embedded = self.embedding(x)                       # (batch, seq_len, embed_dim)
        embedded = embedded.transpose(1, 2)                # (batch, embed_dim, seq_len)
        conv_outputs = []
        for conv in self.convs:
            conv_out = F.relu(conv(embedded))              # (batch, num_filters, L)
            pooled = F.max_pool1d(conv_out, conv_out.size(2)).squeeze(2)
            conv_outputs.append(pooled)
        concatenated = torch.cat(conv_outputs, dim=1)
        dropped = self.dropout(concatenated)
        logits = self.fc(dropped)
        return logits

In [None]:
# ============================================================================
# DATASET
# ============================================================================

class IntentDataset(Dataset):
    """Simple dataset wrapper"""

    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
# ============================================================================
# TRAINING CNN
# ============================================================================

def train_model_pretty(model, X_train, y_train, X_val, y_val):
    """Train the CNN classifier with MLP-style formatted output"""
    model = model.to(config.DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=Config.LEARNING_RATE)
    criterion = nn.CrossEntropyLoss()

    train_dataset = IntentDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True)

    best_val_acc = 0.0

    print("\n" + "=" * 80)
    print("üèãÔ∏è  TRAINING LOOP (WITH PROPER BATCHING!)")
    print("=" * 80)
    print()
    print("Epoch | Train Acc | Train Loss | Val Acc | Val Loss")
    print("-" * 65)

    for epoch in range(Config.EPOCHS):
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        for batch_X, batch_y in train_loader:
            batch_X = batch_X.to(config.DEVICE)
            batch_y = batch_y.to(config.DEVICE)

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += (outputs.argmax(1) == batch_y).sum().item()
            train_total += len(batch_y)

        train_acc = train_correct / (train_total + 1e-12)
        avg_train_loss = train_loss / (len(train_loader) + 1e-12)

        # Validation
        model.eval()
        with torch.no_grad():
            X_val_device = X_val.to(config.DEVICE)
            y_val_device = y_val.to(config.DEVICE)
            val_outputs = model(X_val_device)
            val_loss = criterion(val_outputs, y_val_device).item()
            val_acc = (val_outputs.argmax(1) == y_val_device).float().mean().item()

        # Print progress (every 5 epochs + last)
        if epoch % 5 == 0 or epoch == Config.EPOCHS - 1:
            print(f"{epoch:5d} | {train_acc:9.4f} | {avg_train_loss:10.4f} | {val_acc:7.4f} | {val_loss:8.4f}")

        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(), Config.MODEL_PATH)

    print("\n" + "=" * 80)
    print(f"‚úì Best Validation Accuracy: {best_val_acc:.4f} ({best_val_acc*100:.2f}%)")
    print(f"‚úì Model saved to: {Config.MODEL_PATH}")
    print("=" * 80)

    # Load best model
    model.load_state_dict(torch.load(Config.MODEL_PATH, map_location=config.DEVICE))
    model.eval()

    # Final evaluation on train and val
    with torch.no_grad():
        final_train_pred = model(X_train.to(config.DEVICE)).argmax(1).cpu()
        final_train_acc = (final_train_pred == y_train).float().mean().item()

        final_val_pred = model(X_val.to(config.DEVICE)).argmax(1).cpu()
        final_val_acc = (final_val_pred == y_val).float().mean().item()

    print("\n   Training Accuracy:   {:.4f} ({:.2f}%)".format(final_train_acc, final_train_acc*100))
    print("   Validation Accuracy: {:.4f} ({:.2f}%)".format(final_val_acc, final_val_acc*100))
    print("\n‚úì Model ready for inference!")

    return model

In [None]:
# ============================================================================
# DEBUG PRINTING
# ============================================================================

def print_debug(query, model_intent, model_conf, retrieval_intent,
                retrieval_score, final_intent, decision):
    """Print detailed debug information"""
    print("\n" + "=" * 80)
    print(f"QUERY         : {query}")
    print(f"Model Predict : {model_intent:<20} Confidence: {model_conf:.4f} ({model_conf*100:6.2f}%)")
    print(f"Threshold     : {Config.CONFIDENCE_THRESHOLD} ‚Üí Use Model?: {'YES' if model_conf > Config.CONFIDENCE_THRESHOLD else 'NO'}")
    print(f"Retrieval     : {retrieval_intent:<20} Score: {retrieval_score:.4f}")
    print(f"FINAL INTENT  : ‚Üí {final_intent} ‚Üê (Source: {decision})")
    print("=" * 80)

In [None]:
# ============================================================================
# CHATBOT CLASS
# ============================================================================

class ChatteaBot:
    """Main chatbot class with hybrid classification"""

    def __init__(self, model, embedder, label_encoder, responses,
                 df, sentence_vectors, vocab):
        self.model = model
        self.embedder = embedder
        self.le = label_encoder  # sklearn LabelEncoder instance
        self.responses = responses
        self.df = df.reset_index(drop=True)
        self.sentence_vectors = sentence_vectors.astype(np.float32) if sentence_vectors is not None else np.zeros((len(self.df), embedder.embed_dim))
        self.vocab = vocab

        # Intent mapping
        if hasattr(self.le, "classes_"):
            self.intent_map = {i: label for i, label in enumerate(self.le.classes_)}
        elif isinstance(self.le, dict):
            self.intent_map = {v: k for k, v in self.le.items()}
        else:
            self.intent_map = {}

        self.model.eval()

    def _get_response(self, intent):
        """Get response for intent"""
        response = self.responses.get(intent, self.responses.get("help", "I'm not sure how to help with that."))
        if isinstance(response, dict):
            return response.get("en", response.get("id", next(iter(response.values()))))
        return response

    def get_reply(self, user_input, debug=False):
        """Get chatbot response with optional debug output"""
        text = str(user_input).strip()

        if text == "":
            return "Say something :)"

        # Rule-based greeting
        if any(g in text.lower() for g in ["hai", "halo", "hello", "hi", "hey", "pagi", "siang", "malam"]):
            if debug:
                print_debug(user_input, "greeting", 1.0, "greeting", 1.0, "greeting", "RULE-BASED")
            return self._get_response("greeting")

        # Fuzzy correction
        corrected = fuzzy_correct(text, self.vocab, Config.FUZZY_CUTOFF)
        tokens = tokenize(corrected)

        # Model prediction
        sequence = self.embedder.encode_sequence(tokens, Config.MAX_SEQ_LENGTH)
        x = torch.LongTensor([sequence]).to(config.DEVICE)

        with torch.no_grad():
            logits = self.model(x)
            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
            model_conf = float(probs.max())
            model_idx = int(np.argmax(probs))
            # map to label string
            try:
                model_intent = self.intent_map[model_idx]
            except Exception:
                model_intent = str(model_idx)

        # Retrieval fallback (sentence vectors from embedder average)
        user_vec = self.embedder.sentence_vector(tokens).reshape(1, -1)

        if self.sentence_vectors is None or len(self.sentence_vectors) == 0:
            retrieval_intent = model_intent
            retrieval_score = 0.0
        else:
            similarities = cosine_similarity(user_vec, self.sentence_vectors)[0]
            best_idx = int(np.argmax(similarities))
            retrieval_score = float(similarities[best_idx])
            retrieval_intent = str(self.df.iloc[best_idx]["intent"])

        # Decision
        if model_conf >= Config.CONFIDENCE_THRESHOLD:
            final_intent = model_intent
            decision = "MODEL"
        else:
            final_intent = retrieval_intent
            decision = "RETRIEVAL"

        if debug:
            print_debug(user_input, model_intent, model_conf, retrieval_intent,
                        retrieval_score, final_intent, decision)

        return self._get_response(final_intent)

In [None]:
# ============================================================================
# UTILS: Pretty evaluation & inference output
# ============================================================================

def pretty_inference_tests(bot, test_queries=None):
    if test_queries is None:
        test_queries = [
            "hello",
            "what is chattea",
            "how to blast message",
            "create instance",
            "send bulk messages"
        ]

    print("\n" + "=" * 80)
    print("üß™ TESTING INFERENCE")
    print("=" * 80)
    print("\nRunning test queries:\n")

    for query in test_queries:
        print(f"üë§ User: {query}")
        try:
            response = bot.get_reply(query, debug=False)
            # truncate like your example
            out = response if isinstance(response, str) else str(response)
            print(f"ü§ñ Bot: {out[:200]}{'...' if len(out) > 200 else ''}")
        except Exception as e:
            print("Error during inference:", e)
        print("-" * 80)

def pretty_evaluation(model, X_val, y_val, le, df):
    with torch.no_grad():
        outputs = model(X_val.to(config.DEVICE))
        preds = outputs.argmax(1).cpu().numpy()
        labels = y_val.numpy()

    val_acc = accuracy_score(labels, preds)
    print("\n" + "=" * 80)
    print("üìä MODEL EVALUATION")
    print("=" * 80)
    print(f"‚úì Validation Accuracy: {val_acc:.4f} ({val_acc*100:.2f}%)\n")
    print("üìã Per-Intent Performance:")

    intent_names = list(le.classes_)
    for i, intent_name in enumerate(intent_names):
        mask = labels == i
        count = int(mask.sum())
        if count == 0:
            continue
        intent_acc = (preds[mask] == labels[mask]).mean()
        print(f"   {intent_name:30s}: {intent_acc:.3f} ({count:2d} samples)")

    # Full training set accuracy if available as X_all global
    try:
        if 'X' in globals():
            with torch.no_grad():
                all_outputs = model(X.to(config.DEVICE))
                all_preds = all_outputs.argmax(1).cpu().numpy()
                all_labels = np.array([int(x) for x in df['label'].values])
                train_acc = (all_preds == all_labels).mean()
                print(f"\nAccuracy on FULL training set: {train_acc:.4f}")
    except Exception:
        pass

    print("\n" + "=" * 80)
    print("‚úÖ EVALUATION COMPLETE")
    print("=" * 80)

In [None]:
# ============================================================================
# MAIN PIPELINE
# ============================================================================

def main():
    print("\nüìÇ Loading data...")
    if not os.path.exists(Config.DATASET_PATH):
        raise FileNotFoundError(f"Dataset not found: {Config.DATASET_PATH}")

    df = pd.read_csv(Config.DATASET_PATH)
    if "text" not in df.columns or "intent" not in df.columns:
        raise ValueError("Dataset must have 'text' and 'intent' columns")

    print(f"‚úì Loaded {len(df)} samples, {df['intent'].nunique()} intents")

    # Load responses
    if not os.path.exists(Config.RESPONSES_PATH):
        raise FileNotFoundError(f"Responses file not found: {Config.RESPONSES_PATH}")

    with open(Config.RESPONSES_PATH, "r", encoding="utf-8") as f:
        responses = json.load(f)

    # Build vocabulary (for fuzzy)
    print("\nüìö Building vocabulary...")
    vocab = build_vocabulary(df['text'].tolist())
    print(f"‚úì Vocabulary: {len(vocab)} words")

    # Label encoding
    print("\nüè∑Ô∏è  Encoding labels...")
    le = LabelEncoder()
    df['label'] = le.fit_transform(df['intent'].astype(str))
    num_classes = len(le.classes_)
    print(f"‚úì Classes: {num_classes}")

    # Tokenize
    print("\n‚úÇÔ∏è  Tokenizing...")
    df['tokens'] = df['text'].apply(lambda t: tokenize(str(t)))

    # Word2Vec
    embedder = Word2VecEmbedder()
    if os.path.exists(Config.WORD2VEC_PATH):
        embedder.load(Config.WORD2VEC_PATH)
    else:
        embedder.train(df['tokens'].tolist())
        embedder.save(Config.WORD2VEC_PATH)

    # Prepare sequences
    print("\nüìä Preparing sequences...")
    sequences = np.array([embedder.encode_sequence(tokens, Config.MAX_SEQ_LENGTH) for tokens in df['tokens']], dtype=np.int64)

    X = torch.tensor(sequences, dtype=torch.long)
    y = torch.tensor(df['label'].values, dtype=torch.long)

    # Train/val split
    train_idx, val_idx = train_test_split(
        range(len(df)),
        test_size=Config.TEST_SIZE,
        random_state=Config.RANDOM_SEED,
        stratify=df['label']
    )
    
    X_train = X[train_idx]
    y_train = y[train_idx]
    X_val = X[val_idx]
    y_val = y[val_idx]

    # Build model
    model = TextCNN(vocab_size=embedder.vocab_size, embedding_dim=Config.EMBEDDING_DIM, num_classes=num_classes, embedding_matrix=embedder.embedding_matrix)

    # Train or load model
    if os.path.exists(Config.MODEL_PATH):
        print(f"\n‚úì Found existing model: {Config.MODEL_PATH}")
        try:
            model.load_state_dict(torch.load(Config.MODEL_PATH, map_location=config.DEVICE))
            model.eval()
            print("‚úì Model loaded!")
        except Exception as e:
            print("Failed to load model, will retrain:", e)
            model = train_model_pretty(model, X_train, y_train, X_val, y_val)
    else:
        print("\n‚ö†Ô∏è  No pre-trained model found. Training from scratch...")
        model = train_model_pretty(model, X_train, y_train, X_val, y_val)

    # Prepare sentence vectors for retrieval
    print("\nüìê Preparing sentence vectors for retrieval...")
    sent_vecs = np.stack([embedder.sentence_vector(tokens) for tokens in df['tokens']])
    norms = np.linalg.norm(sent_vecs, axis=1, keepdims=True)
    norms[norms == 0] = 1.0
    sent_vecs_normalized = sent_vecs / norms

    # Create bot
    print("\nü§ñ Initializing chatbot...")
    bot = ChatteaBot(model, embedder, le, responses, df, sent_vecs_normalized, vocab)
    print("‚úì Chatbot ready!")

    # Inference tests
    pretty_inference_tests(bot)

    # Evaluation
    pretty_evaluation(model, X_val, y_val, le, df)

if __name__ == "__main__":
    main()


üìÇ Loading data...
‚úì Loaded 2102 samples, 14 intents

üìö Building vocabulary...
‚úì Vocabulary: 1037 words

üè∑Ô∏è  Encoding labels...
‚úì Classes: 14

‚úÇÔ∏è  Tokenizing...

üß† Training Word2Vec...
‚úì Word2Vec trained: vocab=1043, dim=100
‚úì Word2Vec saved to word2vec.model

üìä Preparing sequences...
‚úì CNN initialized with Word2Vec embeddings!

‚ö†Ô∏è  No pre-trained model found. Training from scratch...

üèãÔ∏è  TRAINING LOOP (WITH PROPER BATCHING!)

Epoch | Train Acc | Train Loss | Val Acc | Val Loss
-----------------------------------------------------------------
    0 |    0.0756 |     2.5957 |  0.1164 |   2.5480
    5 |    0.9423 |     0.2226 |  0.9572 |   0.1690
   10 |    0.9958 |     0.0300 |  0.9810 |   0.0742
   15 |    0.9994 |     0.0072 |  0.9810 |   0.0602
   20 |    1.0000 |     0.0052 |  0.9857 |   0.0614
   25 |    0.9976 |     0.0066 |  0.9905 |   0.0568
   29 |    1.0000 |     0.0035 |  0.9881 |   0.0564

‚úì Best Validation Accuracy: 0.9905 (99.05

In [None]:
# ============================================================================
# FRESH START - DELETE EVERYTHING AND RETRAIN
# ============================================================================

import os

# 1. Delete saved model
if os.path.exists("chattea.pth"):
    os.remove("chattea.pth")
    print("‚úì Deleted old model")

# 2. Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("‚úì Cleared GPU cache")

# 3. Delete saved word2vec.model
if os.path.exists("word2vec.model"):
    os.remove("word2vec.model")
    print("Deleted Old Word2Vec Model")

‚úì Deleted old model
‚úì Cleared GPU cache
Deleted Old Word2Vec Model
