In [5]:
# FAST TRAINING OPTIMIZATION FOR DEMO
# Optimized versions for 30-60 minute training time

import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, Subset
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from tqdm.auto import tqdm
import pickle
import random

print("🚀 FAST TRAINING MODE - OPTIMIZED FOR DEMO")
print("=" * 60)

# ============================================================================
# FAST MENTAL HEALTH MODEL (15-20 minutes)
# ============================================================================

def train_mental_health_fast():
    """Fast training for mental health model"""
    print("\n🧠 TRAINING MENTAL HEALTH MODEL (FAST MODE)")
    print("-" * 50)
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # 1. Load and sample dataset (MUCH SMALLER)
    print("📂 Loading dataset...")
    data_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Combined Data.csv"
    df = pd.read_csv(data_path)
    
    # OPTIMIZATION 1: Use only 10% of data for demo
    sample_size = min(5000, len(df))  # Max 5000 samples
    df_sampled = df.sample(n=sample_size, random_state=42).reset_index(drop=True)
    
    print(f"Original dataset: {len(df)} samples")
    print(f"Using for training: {len(df_sampled)} samples (FAST MODE)")
    
    # Basic cleaning
    def clean_text(text):
        if pd.isna(text):
            return ""
        return str(text).lower().strip()
    
    df_sampled['statement'] = df_sampled['statement'].apply(clean_text)
    df_sampled = df_sampled[df_sampled['statement'].str.len() > 0].reset_index(drop=True)
    
    print(f"After cleaning: {len(df_sampled)} samples")
    print("Class distribution:")
    print(df_sampled['status'].value_counts())
    
    # 2. Label encoding
    label_encoder = LabelEncoder()
    df_sampled['encoded_label'] = label_encoder.fit_transform(df_sampled['status'])
    num_labels = len(label_encoder.classes_)
    
    # 3. OPTIMIZATION 2: Smaller train/val split (80/20 instead of 70/15/15)
    train_texts, test_texts, train_labels, test_labels = train_test_split(
        df_sampled['statement'].tolist(),
        df_sampled['encoded_label'].tolist(),
        test_size=0.2,
        random_state=42,
        stratify=df_sampled['encoded_label']
    )
    
    print(f"Train: {len(train_texts)}, Test: {len(test_texts)}")
    
    # 4. Load tokenizer (try local first, fallback to online)
    print("📚 Loading tokenizer...")
    try:
        tokenizer = BertTokenizer.from_pretrained(r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased")
        print("✅ Using local BERT tokenizer")
    except:
        print("⚠️  Local BERT not found, using online version...")
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        print("✅ Using online BERT tokenizer")
    
    # 5. OPTIMIZATION 3: Smaller max_length for faster processing
    def tokenize_texts(texts, labels, max_length=32):  # Reduced from 64 to 32
        encodings = tokenizer(
            texts,
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encodings['input_ids'],
            'attention_mask': encodings['attention_mask'],
            'labels': torch.tensor(labels, dtype=torch.long)
        }
    
    train_encodings = tokenize_texts(train_texts, train_labels)
    test_encodings = tokenize_texts(test_texts, test_labels)
    
    # 6. Dataset class
    class FastDataset(torch.utils.data.Dataset):
        def __init__(self, encodings):
            self.encodings = encodings
        def __getitem__(self, idx):
            return {k: v[idx] for k, v in self.encodings.items()}
        def __len__(self):
            return len(self.encodings['labels'])
    
    train_dataset = FastDataset(train_encodings)
    test_dataset = FastDataset(test_encodings)
    
    # 7. OPTIMIZATION 4: Larger batch size for faster training
    batch_size = 32  # Increased from 16 to 32
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    # 8. Load model
    model = BertForSequenceClassification.from_pretrained(
        r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased",
        num_labels=num_labels
    )
    model.to(device)
    
    # 9. OPTIMIZATION 5: Higher learning rate + fewer epochs
    num_epochs = 1  # Just 1 epoch for demo
    learning_rate = 5e-5  # Higher learning rate
    
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    num_training_steps = len(train_loader) * num_epochs
    lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
    
    print(f"\n🏃‍♂️ FAST TRAINING: {num_epochs} epoch, LR: {learning_rate}, Batch: {batch_size}")
    
    # 10. Training function
    def train_epoch_fast(model, train_loader, optimizer, lr_scheduler, device):
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc="Training")
        
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            total_loss += loss.item()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        return total_loss / len(train_loader)
    
    def evaluate_fast(model, test_loader, device):
        model.eval()
        all_predictions = []
        all_labels = []
        
        with torch.no_grad():
            for batch in tqdm(test_loader, desc="Evaluating"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                predictions = torch.argmax(outputs.logits, dim=-1)
                
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        return accuracy, all_predictions, all_labels
    
    # 11. Train
    print("🚀 Starting training...")
    train_loss = train_epoch_fast(model, train_loader, optimizer, lr_scheduler, device)
    print(f"Train Loss: {train_loss:.4f}")
    
    # 12. Evaluate
    test_accuracy, test_predictions, test_labels = evaluate_fast(model, test_loader, device)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    
    # 13. Save model
    model_save_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_fast"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    with open(f"{model_save_path}/label_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)
    
    print(f"✅ Mental Health Model saved to: {model_save_path}")
    print(f"🎯 Final Accuracy: {test_accuracy:.4f}")
    
    return model, tokenizer, label_encoder

# ============================================================================
# FAST DIALOGUE ACT MODEL (15-20 minutes)
# ============================================================================

def train_dialogue_act_fast():
    """Fast training for dialogue act model"""
    print("\n💬 TRAINING DIALOGUE ACT MODEL (FAST MODE)")
    print("-" * 50)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 1. Load dataset with sampling
    print("📂 Loading dialogue dataset...")
    dataset = load_dataset("csv", data_files={
        "train": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\train.csv",
        "validation": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\validation.csv",
        "test": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\test.csv"
    })
    
    # OPTIMIZATION 1: Sample smaller subset
    def sample_dataset(dataset_split, sample_ratio=0.1):  # Use only 10% of data
        total_samples = len(dataset_split)
        sample_size = max(500, int(total_samples * sample_ratio))  # At least 500 samples
        indices = random.sample(range(total_samples), min(sample_size, total_samples))
        return dataset_split.select(indices)
    
    dataset['train'] = sample_dataset(dataset['train'], 0.15)  # 15% for training
    dataset['validation'] = sample_dataset(dataset['validation'], 0.3)  # 30% for validation
    dataset['test'] = sample_dataset(dataset['test'], 0.3)  # 30% for test
    
    print(f"Sampled - Train: {len(dataset['train'])}, Val: {len(dataset['validation'])}, Test: {len(dataset['test'])}")
    
    # 2. Preprocess
    def preprocess(batch):
        if isinstance(batch["dialog"][0], list):
            batch["dialog"] = [" ".join(conv) for conv in batch["dialog"]]
        return batch
    
    dataset = dataset.map(preprocess, batched=True)
    
    # 3. Label encoding
    label_encoder = LabelEncoder()
    all_acts = []
    for split in ['train', 'validation', 'test']:
        all_acts.extend(dataset[split]['act'])
    label_encoder.fit(all_acts)
    
    def encode_labels(example):
        return {"act_label": label_encoder.transform([example["act"]])[0]}
    
    dataset = dataset.map(encode_labels)
    
    # 4. Tokenization
    tokenizer = BertTokenizer.from_pretrained(r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased")
    
    def tokenize(batch):
        return tokenizer(batch["dialog"], truncation=True, padding="max_length", max_length=32)  # Reduced to 32
    
    dataset = dataset.map(tokenize, batched=True)
    dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "act_label"])
    
    # 5. DataLoaders
    def collate_fn(batch):
        input_ids = torch.stack([item["input_ids"] for item in batch])
        attention_mask = torch.stack([item["attention_mask"] for item in batch])
        labels = torch.tensor([int(item["act_label"]) for item in batch], dtype=torch.long)
        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
    
    batch_size = 32  # Larger batch size
    train_loader = DataLoader(dataset["train"], batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_loader = DataLoader(dataset["validation"], batch_size=batch_size, collate_fn=collate_fn)
    test_loader = DataLoader(dataset["test"], batch_size=batch_size, collate_fn=collate_fn)
    
    # 6. Model
    num_labels = len(label_encoder.classes_)
    model = BertForSequenceClassification.from_pretrained(
        r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased",
        num_labels=num_labels
    )
    model.to(device)
    
    # 7. Training setup
    num_epochs = 1  # Just 1 epoch
    optimizer = AdamW(model.parameters(), lr=5e-5, weight_decay=0.01)
    num_training_steps = len(train_loader) * num_epochs
    lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
    
    # 8. Train (same functions as above)
    print("🚀 Starting dialogue act training...")
    
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc="Training Dialogue Acts")
    
    for batch in progress_bar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
    
    train_loss = total_loss / len(train_loader)
    print(f"Train Loss: {train_loss:.4f}")
    
    # 9. Evaluate
    model.eval()
    all_predictions = []
    all_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(outputs.logits, dim=-1)
            
            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    test_accuracy = accuracy_score(all_labels, all_predictions)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    
    # 10. Save
    model_save_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_fast"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    with open(f"{model_save_path}/label_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)
    
    print(f"✅ Dialogue Act Model saved to: {model_save_path}")
    print(f"🎯 Final Accuracy: {test_accuracy:.4f}")
    
    return model, tokenizer, label_encoder

# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    import time
    
    total_start_time = time.time()
    
    print("⚡ FAST TRAINING MODE FOR DEMO")
    print("⏱️  Estimated total time: 30-60 minutes")
    print("🎯 Optimizations: Small dataset, 1 epoch, larger batches, shorter sequences")
    print("\n" + "=" * 60)
    
    # Train both models
    try:
        # Mental Health Model (15-20 mins)
        start_time = time.time()
        mental_health_model, mental_health_tokenizer, mental_health_encoder = train_mental_health_fast()
        mental_health_time = time.time() - start_time
        print(f"⏱️  Mental Health Model trained in: {mental_health_time/60:.1f} minutes")
        
        # Dialogue Act Model (15-20 mins)
        start_time = time.time()
        dialogue_model, dialogue_tokenizer, dialogue_encoder = train_dialogue_act_fast()
        dialogue_time = time.time() - start_time
        print(f"⏱️  Dialogue Act Model trained in: {dialogue_time/60:.1f} minutes")
        
        total_time = time.time() - total_start_time
        print("\n" + "=" * 60)
        print("🎉 FAST TRAINING COMPLETED!")
        print(f"⏱️  Total Training Time: {total_time/60:.1f} minutes")
        print("✅ Both models ready for demo!")
        print("=" * 60)
        
        # Quick test
        print("\n🧪 QUICK DEMO TEST:")
        
        # Test mental health
        def quick_mental_health_test(text):
            model = mental_health_model
            tokenizer = mental_health_tokenizer
            encoder = mental_health_encoder
            
            inputs = tokenizer(text, truncation=True, padding='max_length', max_length=32, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(predictions, dim=-1).item()
                confidence = predictions[0][predicted_class].item()
            
            predicted_label = encoder.inverse_transform([predicted_class])[0]
            return predicted_label, confidence
        
        # Test dialogue act
        def quick_dialogue_test(text):
            model = dialogue_model
            tokenizer = dialogue_tokenizer
            encoder = dialogue_encoder
            
            inputs = tokenizer(text, truncation=True, padding='max_length', max_length=32, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
                predicted_class = torch.argmax(predictions, dim=-1).item()
                confidence = predictions[0][predicted_class].item()
            
            predicted_label = encoder.inverse_transform([predicted_class])[0]
            return predicted_label, confidence
        
        # Demo tests
        test_messages = [
            "Hello, how are you today?",
            "I feel so anxious and stressed",
            "Thank you for your help!",
            "I can't handle this anymore"
        ]
        
        for msg in test_messages:
            mental_pred, mental_conf = quick_mental_health_test(msg)
            dialogue_pred, dialogue_conf = quick_dialogue_test(msg)
            print(f"\nMessage: '{msg}'")
            print(f"  Mental Health: {mental_pred} ({mental_conf:.3f})")
            print(f"  Dialogue Act: {dialogue_pred} ({dialogue_conf:.3f})")
        
        print("\n🚀 MODELS ARE READY FOR YOUR DEMO TOMORROW! 🚀")
        
    except Exception as e:
        print(f"❌ Error during training: {str(e)}")
        import traceback
        traceback.print_exc()

# Quick demo inference functions for tomorrow
def create_demo_functions():
    """
    Create simple demo functions for tomorrow's presentation
    """
    
    print("\n📝 Creating demo functions...")
    
    demo_code = '''# DEMO FUNCTIONS FOR TOMORROW'S PRESENTATION

import torch
import pickle
from transformers import BertTokenizer, BertForSequenceClassification

class DemoChatbot:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.load_models()
    
    def load_models(self):
        # Load Mental Health Model
        self.mental_health_model = BertForSequenceClassification.from_pretrained(
            r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_mental_health_bert_fast"
        )
        self.mental_health_tokenizer = BertTokenizer.from_pretrained(
            r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_mental_health_bert_fast"
        )
        with open(r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_mental_health_bert_fast\\label_encoder.pkl", "rb") as f:
            self.mental_health_encoder = pickle.load(f)
        
        # Load Dialogue Act Model
        self.dialogue_model = BertForSequenceClassification.from_pretrained(
            r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_dialogue_bert_fast"
        )
        self.dialogue_tokenizer = BertTokenizer.from_pretrained(
            r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_dialogue_bert_fast"
        )
        with open(r"C:\\Users\\NAMAN\\Documents\\GitHub\\Prototype-\\trained_dialogue_bert_fast\\label_encoder.pkl", "rb") as f:
            self.dialogue_encoder = pickle.load(f)
        
        self.mental_health_model.to(self.device).eval()
        self.dialogue_model.to(self.device).eval()
    
    def analyze_message(self, text):
        """Complete analysis for demo"""
        
        # Mental Health Analysis
        inputs = self.mental_health_tokenizer(text, truncation=True, padding='max_length', 
                                            max_length=32, return_tensors='pt')
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = self.mental_health_model(**inputs)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            mental_class = torch.argmax(predictions, dim=-1).item()
            mental_conf = predictions[0][mental_class].item()
        
        mental_state = self.mental_health_encoder.inverse_transform([mental_class])[0]
        
        # Dialogue Act Analysis
        inputs = self.dialogue_tokenizer(text, truncation=True, padding='max_length', 
                                       max_length=32, return_tensors='pt')
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = self.dialogue_model(**outputs)
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            dialogue_class = torch.argmax(predictions, dim=-1).item()
            dialogue_conf = predictions[0][dialogue_class].item()
        
        dialogue_act = self.dialogue_encoder.inverse_transform([dialogue_class])[0]
        
        return {
            'message': text,
            'mental_health': {
                'state': mental_state,
                'confidence': mental_conf,
                'needs_attention': mental_state in ['Anxiety', 'Depression', 'Suicidal', 'Stress']
            },
            'dialogue_act': {
                'act': dialogue_act,
                'confidence': dialogue_conf
            }
        }

# DEMO USAGE
if __name__ == "__main__":
    chatbot = DemoChatbot()
    
    demo_messages = [
        "Hello! How can you help me?",
        "I've been feeling really anxious lately",
        "Thank you so much for listening",
        "I don't know what to do anymore, everything seems hopeless",
        "Can you recommend some coping strategies?",
        "That was very helpful, I appreciate it"
    ]
    
    print("CHATBOT DEMO - LIVE ANALYSIS")
    print("=" * 50)
    
    for msg in demo_messages:
        result = chatbot.analyze_message(msg)
        print(f"\\nUser: {msg}")
        print(f"Mental State: {result['mental_health']['state']} ({result['mental_health']['confidence']:.2f})")
        print(f"Dialogue Act: {result['dialogue_act']['act']} ({result['dialogue_act']['confidence']:.2f})")
        if result['mental_health']['needs_attention']:
            print("WARNING: Mental health attention needed!")
'''
    
    with open("demo_chatbot.py", "w", encoding='utf-8') as f:
        f.write(demo_code)
    
    print("✅ Demo file created: demo_chatbot.py")
    print("🎯 Ready for tomorrow's presentation!")

# Run the demo file creation
create_demo_functions()

🚀 FAST TRAINING MODE - OPTIMIZED FOR DEMO
⚡ FAST TRAINING MODE FOR DEMO
⏱️  Estimated total time: 30-60 minutes
🎯 Optimizations: Small dataset, 1 epoch, larger batches, shorter sequences


🧠 TRAINING MENTAL HEALTH MODEL (FAST MODE)
--------------------------------------------------
Using device: cpu
📂 Loading dataset...
Original dataset: 53043 samples
Using for training: 5000 samples (FAST MODE)
After cleaning: 4959 samples
Class distribution:
status
Normal                  1565
Depression              1505
Suicidal                 902
Anxiety                  351
Stress                   272
Bipolar                  261
Personality disorder     103
Name: count, dtype: int64
Train: 3967, Test: 992
📚 Loading tokenizer...
✅ Using local BERT tokenizer


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🏃‍♂️ FAST TRAINING: 1 epoch, LR: 5e-05, Batch: 32
🚀 Starting training...


Training: 100%|██████████| 124/124 [08:27<00:00,  4.09s/it, loss=1.1460]


Train Loss: 1.1626


Evaluating: 100%|██████████| 31/31 [00:30<00:00,  1.02it/s]


Test Accuracy: 0.6310
✅ Mental Health Model saved to: C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_fast
🎯 Final Accuracy: 0.6310
⏱️  Mental Health Model trained in: 9.2 minutes

💬 TRAINING DIALOGUE ACT MODEL (FAST MODE)
--------------------------------------------------
📂 Loading dialogue dataset...
Sampled - Train: 1667, Val: 500, Test: 500


Map: 100%|██████████| 1667/1667 [00:00<00:00, 15118.38 examples/s]
Map: 100%|██████████| 500/500 [00:00<00:00, 10030.04 examples/s]
Map: 100%|██████████| 500/500 [00:00<00:00, 11302.60 examples/s]
Map: 100%|██████████| 1667/1667 [00:04<00:00, 338.35 examples/s]
Map: 100%|██████████| 500/500 [00:01<00:00, 360.41 examples/s]
Map: 100%|██████████| 500/500 [00:01<00:00, 360.69 examples/s]
Map: 100%|██████████| 1667/1667 [00:03<00:00, 510.55 examples/s]
Map: 100%|██████████| 500/500 [00:01<00:00, 488.57 examples/s]
Map: 100%|██████████| 500/500 [00:01<00:00, 478.75 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🚀 Starting dialogue act training...


Training Dialogue Acts: 100%|██████████| 53/53 [03:39<00:00,  4.14s/it, loss=7.1889]


Train Loss: 7.3277


Testing: 100%|██████████| 16/16 [00:15<00:00,  1.04it/s]
  type_true = type_of_target(y_true, input_name="y_true")


Test Accuracy: 0.0060
✅ Dialogue Act Model saved to: C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_fast
🎯 Final Accuracy: 0.0060
⏱️  Dialogue Act Model trained in: 4.2 minutes

🎉 FAST TRAINING COMPLETED!
⏱️  Total Training Time: 13.5 minutes
✅ Both models ready for demo!

🧪 QUICK DEMO TEST:
❌ Error during training: name 'device' is not defined

📝 Creating demo functions...
✅ Demo file created: demo_chatbot.py
🎯 Ready for tomorrow's presentation!


Traceback (most recent call last):
  File "C:\Users\NAMAN\AppData\Local\Temp\ipykernel_19872\551727435.py", line 432, in <module>
    mental_pred, mental_conf = quick_mental_health_test(msg)
                               ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^
  File "C:\Users\NAMAN\AppData\Local\Temp\ipykernel_19872\551727435.py", line 394, in quick_mental_health_test
    inputs = {k: v.to(device) for k, v in inputs.items()}
                      ^^^^^^
NameError: name 'device' is not defined


In [7]:
# FINAL IMPROVED TRAINING CODE - READY TO RUN TONIGHT
# Target: 75-80% accuracy in ~1 hour

import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.utils import resample
from sklearn.utils.class_weight import compute_class_weight
from tqdm.auto import tqdm
import pickle
import time

print("🚀 FINAL IMPROVED TRAINING - TARGET 75-80% ACCURACY")
print("=" * 60)

# ============================================================================
# IMPROVED MENTAL HEALTH MODEL
# ============================================================================

def final_improved_mental_health_training():
    """
    Final improved version with all quick wins implemented
    """
    print("\n🧠 IMPROVED MENTAL HEALTH MODEL")
    print("-" * 50)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # IMPROVEMENT 1: Load much more data (5x more)
    print("📂 Loading dataset...")
    data_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Combined Data.csv"
    df = pd.read_csv(data_path)
    
    # Use 25,000 samples instead of 5,000
    sample_size = min(25000, len(df))
    df_sampled = df.sample(n=sample_size, random_state=42).reset_index(drop=True)
    
    print(f"Using {len(df_sampled)} samples (was 5,000)")
    
    # Better text cleaning
    def improved_clean_text(text):
        if pd.isna(text):
            return ""
        text = str(text).lower().strip()
        # Remove extra whitespaces
        text = ' '.join(text.split())
        return text
    
    df_sampled['statement'] = df_sampled['statement'].apply(improved_clean_text)
    df_sampled = df_sampled[df_sampled['statement'].str.len() > 5].reset_index(drop=True)
    
    print(f"After cleaning: {len(df_sampled)} samples")
    print("Original class distribution:")
    print(df_sampled['status'].value_counts())
    
    # IMPROVEMENT 2: Balance the dataset
    print("\n⚖️ Balancing dataset...")
    
    # Set target samples per class (balance between minority and majority)
    min_samples = 1000  # Minimum samples per class
    max_samples = 3000  # Maximum samples per class
    
    balanced_dfs = []
    for class_name in df_sampled['status'].unique():
        class_df = df_sampled[df_sampled['status'] == class_name]
        
        if len(class_df) < min_samples:
            # Oversample minority classes
            class_df_resampled = resample(
                class_df, 
                replace=True,
                n_samples=min_samples,
                random_state=42
            )
            print(f"  {class_name}: {len(class_df)} → {min_samples} (oversampled)")
        elif len(class_df) > max_samples:
            # Undersample majority classes
            class_df_resampled = resample(
                class_df,
                replace=False,
                n_samples=max_samples,
                random_state=42
            )
            print(f"  {class_name}: {len(class_df)} → {max_samples} (undersampled)")
        else:
            class_df_resampled = class_df
            print(f"  {class_name}: {len(class_df)} (unchanged)")
        
        balanced_dfs.append(class_df_resampled)
    
    df_balanced = pd.concat(balanced_dfs, ignore_index=True)
    df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
    
    print(f"\nBalanced dataset: {len(df_balanced)} samples")
    print("Balanced class distribution:")
    print(df_balanced['status'].value_counts())
    
    # Label encoding
    label_encoder = LabelEncoder()
    df_balanced['encoded_label'] = label_encoder.fit_transform(df_balanced['status'])
    num_labels = len(label_encoder.classes_)
    
    print(f"\nLabel mapping ({num_labels} classes):")
    for i, label in enumerate(label_encoder.classes_):
        print(f"  {i}: {label}")
    
    # IMPROVEMENT 3: Better train/validation split with stratification
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        df_balanced['statement'].tolist(),
        df_balanced['encoded_label'].tolist(),
        test_size=0.2,
        random_state=42,
        stratify=df_balanced['encoded_label']  # Maintain class distribution
    )
    
    print(f"\nDataset splits:")
    print(f"  Train: {len(train_texts)} samples")
    print(f"  Validation: {len(val_texts)} samples")
    
    # IMPROVEMENT 4: Better tokenizer and model loading
    print("\n📚 Loading tokenizer and model...")
    try:
        tokenizer = BertTokenizer.from_pretrained(r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased")
        model = BertForSequenceClassification.from_pretrained(
            r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased",
            num_labels=num_labels
        )
        print("✅ Using local BERT model")
    except:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        model = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased',
            num_labels=num_labels
        )
        print("✅ Using online BERT model")
    
    model.to(device)
    
    # IMPROVEMENT 5: Better tokenization with longer sequences
    def improved_tokenize_texts(texts, labels, max_length=64):  # Increased from 32 to 64
        encodings = tokenizer(
            texts,
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encodings['input_ids'],
            'attention_mask': encodings['attention_mask'],
            'labels': torch.tensor(labels, dtype=torch.long)
        }
    
    print("🔤 Tokenizing texts...")
    train_encodings = improved_tokenize_texts(train_texts, train_labels)
    val_encodings = improved_tokenize_texts(val_texts, val_labels)
    
    # Dataset class
    class ImprovedDataset(torch.utils.data.Dataset):
        def __init__(self, encodings):
            self.encodings = encodings
        def __getitem__(self, idx):
            return {k: v[idx] for k, v in self.encodings.items()}
        def __len__(self):
            return len(self.encodings['labels'])
    
    train_dataset = ImprovedDataset(train_encodings)
    val_dataset = ImprovedDataset(val_encodings)
    
    # IMPROVEMENT 6: Better batch size and data loading
    batch_size = 16  # Reduced from 32 to 16 for stability
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    print(f"📦 Created DataLoaders with batch size: {batch_size}")
    
    # IMPROVEMENT 7: Better training configuration
    num_epochs = 3              # Increased from 1 to 3
    learning_rate = 2e-5        # Reduced from 5e-5 to 2e-5
    weight_decay = 0.01         # Added regularization
    warmup_ratio = 0.1          # Added warmup
    
    print(f"\n🔧 Training configuration:")
    print(f"  Epochs: {num_epochs}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Max length: 64 tokens")
    print(f"  Weight decay: {weight_decay}")
    print(f"  Warmup ratio: {warmup_ratio}")
    
    # IMPROVEMENT 8: Compute class weights for weighted loss
    class_weights = compute_class_weight(
        'balanced',
        classes=np.unique(df_balanced['encoded_label']),
        y=df_balanced['encoded_label']
    )
    class_weights_tensor = torch.FloatTensor(class_weights).to(device)
    
    print(f"\n⚖️ Class weights computed:")
    for i, (class_name, weight) in enumerate(zip(label_encoder.classes_, class_weights)):
        print(f"  {class_name}: {weight:.3f}")
    
    # IMPROVEMENT 9: Better optimizer and scheduler
    optimizer = AdamW(
        model.parameters(),
        lr=learning_rate,
        weight_decay=weight_decay,
        betas=(0.9, 0.999)
    )
    
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = int(warmup_ratio * num_training_steps)
    
    lr_scheduler = get_scheduler(
        "cosine",  # Changed from linear to cosine
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    # Weighted loss function
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)
    
    print(f"📈 Scheduler: cosine with {num_warmup_steps} warmup steps")
    
    # IMPROVEMENT 10: Advanced training loop with validation and early stopping
    def improved_train_epoch(model, train_loader, optimizer, lr_scheduler, criterion, device):
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc="Training")
        
        for batch in progress_bar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            optimizer.zero_grad()
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)  # Use weighted loss
            
            loss.backward()
            
            # Gradient clipping for stability
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            
            optimizer.step()
            lr_scheduler.step()
            
            total_loss += loss.item()
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        return total_loss / len(train_loader)
    
    def improved_evaluate(model, val_loader, device):
        model.eval()
        all_predictions = []
        all_labels = []
        total_loss = 0
        
        criterion_eval = torch.nn.CrossEntropyLoss()
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validating"):
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = criterion_eval(outputs.logits, labels)
                total_loss += loss.item()
                
                predictions = torch.argmax(outputs.logits, dim=-1)
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        f1 = f1_score(all_labels, all_predictions, average='weighted')
        avg_loss = total_loss / len(val_loader)
        
        return accuracy, f1, avg_loss, all_predictions, all_labels
    
    # IMPROVEMENT 11: Training with early stopping
    print("\n🚀 Starting improved training...")
    
    best_f1 = 0
    best_accuracy = 0
    best_model_state = None
    patience = 2
    patience_counter = 0
    
    train_losses = []
    val_accuracies = []
    val_f1_scores = []
    
    training_start_time = time.time()
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-" * 40)
        
        epoch_start_time = time.time()
        
        # Train
        train_loss = improved_train_epoch(model, train_loader, optimizer, lr_scheduler, criterion, device)
        train_losses.append(train_loss)
        
        # Validate
        val_accuracy, val_f1, val_loss, val_predictions, val_labels = improved_evaluate(model, val_loader, device)
        val_accuracies.append(val_accuracy)
        val_f1_scores.append(val_f1)
        
        epoch_time = time.time() - epoch_start_time
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Loss: {val_loss:.4f}")
        print(f"Val Accuracy: {val_accuracy:.4f}")
        print(f"Val F1-Score: {val_f1:.4f}")
        print(f"Epoch Time: {epoch_time/60:.1f} minutes")
        
        # Save best model based on F1-score
        if val_f1 > best_f1:
            best_f1 = val_f1
            best_accuracy = val_accuracy
            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"✅ New best model! F1: {best_f1:.4f}, Accuracy: {best_accuracy:.4f}")
        else:
            patience_counter += 1
            print(f"⏳ No improvement. Patience: {patience_counter}/{patience}")
        
        if patience_counter >= patience and epoch > 0:  # Allow at least 2 epochs
            print("🛑 Early stopping triggered!")
            break
    
    total_training_time = time.time() - training_start_time
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\n📥 Loaded best model with F1: {best_f1:.4f}, Accuracy: {best_accuracy:.4f}")
    
    # Final evaluation
    print("\n📊 Final Evaluation:")
    final_accuracy, final_f1, _, final_predictions, final_labels = improved_evaluate(model, val_loader, device)
    
    print(f"Final Validation Results:")
    print(f"  Accuracy: {final_accuracy:.4f}")
    print(f"  F1-Score: {final_f1:.4f}")
    print(f"  Training Time: {total_training_time/60:.1f} minutes")
    
    # Detailed classification report
    print(f"\n📋 Detailed Classification Report:")
    target_names = label_encoder.classes_
    print(classification_report(final_labels, final_predictions, target_names=target_names, digits=4))
    
    # Save improved model
    model_save_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_improved"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    with open(f"{model_save_path}/label_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)
    
    # Save training history
    training_history = {
        'train_losses': train_losses,
        'val_accuracies': val_accuracies,
        'val_f1_scores': val_f1_scores,
        'best_accuracy': best_accuracy,
        'best_f1': best_f1,
        'final_accuracy': final_accuracy,
        'final_f1': final_f1,
        'training_time_minutes': total_training_time/60,
        'label_classes': label_encoder.classes_.tolist()
    }
    
    with open(f"{model_save_path}/training_history.pkl", "wb") as f:
        pickle.dump(training_history, f)
    
    print(f"\n✅ Improved model saved to: {model_save_path}")
    print(f"📈 Improvement: 63.1% → {final_accuracy:.1%}")
    
    return model, tokenizer, label_encoder, final_accuracy

# ============================================================================
# IMPROVED DIALOGUE ACT MODEL (Fixed approach)
# ============================================================================

def improved_dialogue_act_training():
    """
    Improved dialogue act training with fixes for low accuracy
    """
    print("\n💬 IMPROVED DIALOGUE ACT MODEL")
    print("-" * 50)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load dataset
    print("📂 Loading dialogue dataset...")
    dataset = load_dataset("csv", data_files={
        "train": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\train.csv",
        "validation": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\validation.csv",
        "test": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\test.csv"
    })
    
    # IMPROVEMENT 1: Use more data but not too much (balance speed vs accuracy)
    def sample_dataset_improved(dataset_split, sample_ratio=0.3):  # Use 30% instead of 10%
        total_samples = len(dataset_split)
        sample_size = max(1000, int(total_samples * sample_ratio))
        indices = np.random.choice(total_samples, min(sample_size, total_samples), replace=False)
        return dataset_split.select(indices.tolist())
    
    dataset['train'] = sample_dataset_improved(dataset['train'], 0.4)      # 40% for training
    dataset['validation'] = sample_dataset_improved(dataset['validation'], 0.5)  # 50% for validation
    dataset['test'] = sample_dataset_improved(dataset['test'], 0.5)        # 50% for test
    
    print(f"Using - Train: {len(dataset['train'])}, Val: {len(dataset['validation'])}, Test: {len(dataset['test'])}")
    
    # Preprocess
    def preprocess(batch):
        if isinstance(batch["dialog"][0], list):
            batch["dialog"] = [" ".join(conv) for conv in batch["dialog"]]
        return batch
    
    dataset = dataset.map(preprocess, batched=True)
    
    # IMPROVEMENT 2: Check for class imbalance and potentially group similar classes
    all_acts = []
    for split in ['train', 'validation', 'test']:
        all_acts.extend(dataset[split]['act'])
    
    act_counts = pd.Series(all_acts).value_counts()
    print(f"\nDialogue act distribution ({len(act_counts)} classes):")
    print(act_counts.head(10))
    print(f"Classes with <10 samples: {sum(act_counts < 10)}")
    
    # IMPROVEMENT 3: Filter out very rare classes (optional)
    min_samples_per_class = 5
    common_acts = act_counts[act_counts >= min_samples_per_class].index.tolist()
    
    def filter_common_acts(example):
        return example['act'] in common_acts
    
    original_train_size = len(dataset['train'])
    dataset = dataset.filter(filter_common_acts)
    print(f"Filtered rare classes: {original_train_size} → {len(dataset['train'])} train samples")
    print(f"Using {len(common_acts)} dialogue act classes (removed {len(act_counts) - len(common_acts)} rare classes)")
    
    # Label encoding
    label_encoder = LabelEncoder()
    all_acts_filtered = []
    for split in ['train', 'validation', 'test']:
        all_acts_filtered.extend(dataset[split]['act'])
    label_encoder.fit(all_acts_filtered)
    
    def encode_labels(example):
        return {"act_label": label_encoder.transform([example["act"]])[0]}
    
    dataset = dataset.map(encode_labels)
    
    print(f"Final dialogue act classes ({len(label_encoder.classes_)}):")
    for i, act in enumerate(label_encoder.classes_):
        print(f"  {i}: {act}")
    
    # Tokenization with improved settings
    try:
        tokenizer = BertTokenizer.from_pretrained(r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased")
    except:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    def tokenize(batch):
        return tokenizer(batch["dialog"], truncation=True, padding="max_length", max_length=64)  # Increased length
    
    dataset = dataset.map(tokenize, batched=True)
    dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "act_label"])
    
    # Improved collate function
    def improved_collate_fn(batch):
        input_ids = torch.stack([item["input_ids"] for item in batch])
        attention_mask = torch.stack([item["attention_mask"] for item in batch])
        labels = torch.tensor([int(item["act_label"]) for item in batch], dtype=torch.long)
        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
    
    # DataLoaders
    batch_size = 16  # Smaller batch size for stability
    train_loader = DataLoader(dataset["train"], batch_size=batch_size, shuffle=True, collate_fn=improved_collate_fn)
    val_loader = DataLoader(dataset["validation"], batch_size=batch_size, collate_fn=improved_collate_fn)
    test_loader = DataLoader(dataset["test"], batch_size=batch_size, collate_fn=improved_collate_fn)
    
    # Model
    num_labels = len(label_encoder.classes_)
    try:
        model = BertForSequenceClassification.from_pretrained(
            r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased",
            num_labels=num_labels
        )
    except:
        model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)
    
    model.to(device)
    
    # IMPROVEMENT 4: Better training configuration for dialogue acts
    num_epochs = 2  # 2 epochs should be enough
    learning_rate = 3e-5  # Slightly higher for dialogue classification
    
    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    num_training_steps = len(train_loader) * num_epochs
    lr_scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)
    
    print(f"\n🔧 Dialogue training config:")
    print(f"  Classes: {num_labels}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    
    # Training
    print("\n🚀 Training dialogue act model...")
    
    best_accuracy = 0
    best_model_state = None
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        
        # Train
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc="Training Dialogue")
        
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            lr_scheduler.step()
            
            total_loss += loss.item()
            progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})
        
        train_loss = total_loss / len(train_loader)
        
        # Validate
        model.eval()
        val_predictions = []
        val_labels = []
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validating"):
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                predictions = torch.argmax(outputs.logits, dim=-1)
                
                val_predictions.extend(predictions.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        
        val_accuracy = accuracy_score(val_labels, val_predictions)
        
        print(f"Train Loss: {train_loss:.4f}")
        print(f"Val Accuracy: {val_accuracy:.4f}")
        
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_model_state = model.state_dict().copy()
            print(f"✅ New best dialogue model: {best_accuracy:.4f}")
    
    # Load best model and final test
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    # Final test
    model.eval()
    test_predictions = []
    test_labels = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Testing"):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            predictions = torch.argmax(outputs.logits, dim=-1)
            
            test_predictions.extend(predictions.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())
    
    test_accuracy = accuracy_score(test_labels, test_predictions)
    print(f"\n🎯 Final Dialogue Test Accuracy: {test_accuracy:.4f}")
    
    # Save model
    model_save_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_improved"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    with open(f"{model_save_path}/label_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)
    
    print(f"✅ Improved dialogue model saved to: {model_save_path}")
    
    return model, tokenizer, label_encoder, test_accuracy

# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    
    print("🎯 TONIGHT'S IMPROVED TRAINING SESSION")
    print("Target: 75-80% accuracy in ~1 hour")
    print("=" * 60)
    
    total_start_time = time.time()
    
    try:
        # Train improved mental health model
        print("Phase 1: Mental Health Model (Priority)")
        mental_start_time = time.time()
        mental_model, mental_tokenizer, mental_encoder, mental_accuracy = final_improved_mental_health_training()
        mental_time = time.time() - mental_start_time
        
        print(f"\n📊 Mental Health Results:")
        print(f"  Accuracy: {mental_accuracy:.1%}")
        print(f"  Training time: {mental_time/60:.1f} minutes")
        print(f"  Improvement: 63.1% → {mental_accuracy:.1%}")
        
        # Train improved dialogue act model
        print("\nPhase 2: Dialogue Act Model")
        dialogue_start_time = time.time()
        dialogue_model, dialogue_tokenizer, dialogue_encoder, dialogue_accuracy = improved_dialogue_act_training()
        dialogue_time = time.time() - dialogue_start_time
        
        print(f"\n📊 Dialogue Act Results:")
        print(f"  Accuracy: {dialogue_accuracy:.1%}")
        print(f"  Training time: {dialogue_time/60:.1f} minutes")
        print(f"  Improvement: 0.6% → {dialogue_accuracy:.1%}")
        
        total_time = time.time() - total_start_time
        
        print("\n" + "=" * 60)
        print("🎉 IMPROVED TRAINING COMPLETED!")
        print("=" * 60)
        print(f"⏱️  Total Training Time: {total_time/60:.1f} minutes")
        print(f"🧠 Mental Health: {mental_accuracy:.1%} accuracy")
        print(f"💬 Dialogue Acts: {dialogue_accuracy:.1%} accuracy")
        print("✅ Both models ready for impressive demo tomorrow!")
        
        # Quick demo test
        print("\n🧪 QUICK DEMO TEST:")
        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        def demo_test(text):
            # Test mental health
            inputs = mental_tokenizer(text, truncation=True, padding='max_length', max_length=64, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = mental_model(**inputs)
                mental_pred = torch.argmax(outputs.logits, dim=-1).item()
                mental_conf = torch.softmax(outputs.logits, dim=-1)[0][mental_pred].item()
            
            mental_state = mental_encoder.inverse_transform([mental_pred])[0]
            
            # Test dialogue act
            inputs = dialogue_tokenizer(text, truncation=True, padding='max_length', max_length=64, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = dialogue_model(**inputs)
                dialogue_pred = torch.argmax(outputs.logits, dim=-1).item()
                dialogue_conf = torch.softmax(outputs.logits, dim=-1)[0][dialogue_pred].item()
            
            dialogue_act = dialogue_encoder.inverse_transform([dialogue_pred])[0]
            
            return mental_state, mental_conf, dialogue_act, dialogue_conf
        
        # Demo messages
        demo_messages = [
            "Hello, how are you today?",
            "I feel really anxious and stressed",
            "Thank you for your help!",
            "Everything feels hopeless, I can't go on"
        ]
        
        for msg in demo_messages:
            try:
                mental_state, mental_conf, dialogue_act, dialogue_conf = demo_test(msg)
                print(f"\nMessage: '{msg}'")
                print(f"  Mental Health: {mental_state} ({mental_conf:.3f})")
                print(f"  Dialogue Act: {dialogue_act} ({dialogue_conf:.3f})")
                if mental_state in ['Anxiety', 'Depression', 'Suicidal', 'Stress']:
                    print("  ⚠️  Needs attention!")
            except Exception as e:
                print(f"  Error testing: {str(e)}")
        
        print("\n🚀 MODELS ARE READY FOR TOMORROW'S DEMO! 🚀")
        
    except Exception as e:
        print(f"❌ Error during training: {str(e)}")
        import traceback
        traceback.print_exc()

🚀 FINAL IMPROVED TRAINING - TARGET 75-80% ACCURACY
🎯 TONIGHT'S IMPROVED TRAINING SESSION
Target: 75-80% accuracy in ~1 hour
Phase 1: Mental Health Model (Priority)

🧠 IMPROVED MENTAL HEALTH MODEL
--------------------------------------------------
Using device: cpu
📂 Loading dataset...
Using 25000 samples (was 5,000)
After cleaning: 24774 samples
Original class distribution:
status
Normal                  7718
Depression              7287
Suicidal                4962
Anxiety                 1774
Bipolar                 1287
Stress                  1239
Personality disorder     507
Name: count, dtype: int64

⚖️ Balancing dataset...
  Depression: 7287 → 3000 (undersampled)
  Normal: 7718 → 3000 (undersampled)
  Suicidal: 4962 → 3000 (undersampled)
  Anxiety: 1774 (unchanged)
  Bipolar: 1287 (unchanged)
  Stress: 1239 (unchanged)
  Personality disorder: 507 → 1000 (oversampled)

Balanced dataset: 14300 samples
Balanced class distribution:
status
Normal                  3000
Depression     

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Using local BERT model
🔤 Tokenizing texts...
📦 Created DataLoaders with batch size: 16

🔧 Training configuration:
  Epochs: 3
  Learning rate: 2e-05
  Batch size: 16
  Max length: 64 tokens
  Weight decay: 0.01
  Warmup ratio: 0.1

⚖️ Class weights computed:
  Anxiety: 1.152
  Bipolar: 1.587
  Depression: 0.681
  Normal: 0.681
  Personality disorder: 2.043
  Stress: 1.649
  Suicidal: 0.681
📈 Scheduler: cosine with 214 warmup steps

🚀 Starting improved training...

Epoch 1/3
----------------------------------------


Training: 100%|██████████| 715/715 [51:44<00:00,  4.34s/it, loss=0.4550]
Validating: 100%|██████████| 179/179 [02:26<00:00,  1.23it/s]


Train Loss: 1.1136
Val Loss: 0.6399
Val Accuracy: 0.7493
Val F1-Score: 0.7420
Epoch Time: 54.2 minutes
✅ New best model! F1: 0.7420, Accuracy: 0.7493

Epoch 2/3
----------------------------------------


Training: 100%|██████████| 715/715 [52:57<00:00,  4.44s/it, loss=0.1573]
Validating: 100%|██████████| 179/179 [04:46<00:00,  1.60s/it]


Train Loss: 0.4903
Val Loss: 0.5613
Val Accuracy: 0.7902
Val F1-Score: 0.7887
Epoch Time: 57.7 minutes
✅ New best model! F1: 0.7887, Accuracy: 0.7902

Epoch 3/3
----------------------------------------


Training: 100%|██████████| 715/715 [50:33<00:00,  4.24s/it, loss=0.7164]
Validating: 100%|██████████| 179/179 [02:52<00:00,  1.04it/s]


Train Loss: 0.3079
Val Loss: 0.5727
Val Accuracy: 0.7913
Val F1-Score: 0.7892
Epoch Time: 53.4 minutes
✅ New best model! F1: 0.7892, Accuracy: 0.7913

📥 Loaded best model with F1: 0.7892, Accuracy: 0.7913

📊 Final Evaluation:


Validating: 100%|██████████| 179/179 [02:41<00:00,  1.11it/s]


Final Validation Results:
  Accuracy: 0.7913
  F1-Score: 0.7892
  Training Time: 165.3 minutes

📋 Detailed Classification Report:
                      precision    recall  f1-score   support

             Anxiety     0.8736    0.8563    0.8649       355
             Bipolar     0.7950    0.8599    0.8262       257
          Depression     0.7440    0.5717    0.6466       600
              Normal     0.9308    0.8967    0.9134       600
Personality disorder     0.8318    0.8900    0.8599       200
              Stress     0.7174    0.7984    0.7557       248
            Suicidal     0.6823    0.8017    0.7372       600

            accuracy                         0.7913      2860
           macro avg     0.7964    0.8107    0.8005      2860
        weighted avg     0.7947    0.7913    0.7892      2860


✅ Improved model saved to: C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_improved
📈 Improvement: 63.1% → 79.1%

📊 Mental Health Results:
  Accuracy: 79.1%
  Tra

Map: 100%|██████████| 4447/4447 [00:00<00:00, 15832.75 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 15933.08 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 16644.79 examples/s]



Dialogue act distribution (3298 classes):
[2 1 2 1]        197
[1 1 1 1]        116
[2 1 1 1]        110
[1 1]             95
[2 1]             82
[2 1 2 1 2 1]     70
[1 2 1 1]         60
[3 4]             59
[2 1 2 1 1]       57
[2 1 3 4]         57
Name: count, dtype: int64
Classes with <10 samples: 3230


Filter: 100%|██████████| 4447/4447 [00:00<00:00, 45720.34 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 21390.12 examples/s]
Filter: 100%|██████████| 1000/1000 [00:00<00:00, 26024.74 examples/s]


Filtered rare classes: 4447 → 1863 train samples
Using 161 dialogue act classes (removed 3137 rare classes)


Map: 100%|██████████| 1863/1863 [00:00<00:00, 2158.36 examples/s]
Map: 100%|██████████| 408/408 [00:00<00:00, 1522.07 examples/s]
Map: 100%|██████████| 334/334 [00:00<00:00, 1887.69 examples/s]


Final dialogue act classes (161):
  0: [1 1 1 1 1 1 1 1]
  1: [1 1 1 1 1 1 1]
  2: [1 1 1 1 1 1]
  3: [1 1 1 1 1]
  4: [1 1 1 1 3 4 1]
  5: [1 1 1 1]
  6: [1 1 1 2 1 1]
  7: [1 1 1 2 1]
  8: [1 1 1 3 4]
  9: [1 1 1]
  10: [1 1 2 1 1 1]
  11: [1 1 2 1 1]
  12: [1 1 2 1 2 1]
  13: [1 1 2 1]
  14: [1 1 3 4 1]
  15: [1 1 3 4]
  16: [1 1]
  17: [1 2 1 1 1 1 1 1]
  18: [1 2 1 1 1 1 1]
  19: [1 2 1 1 1 1]
  20: [1 2 1 1 1]
  21: [1 2 1 1 2 1 1]
  22: [1 2 1 1]
  23: [1 2 1 2 1 1]
  24: [1 2 1 2 1 2 1 1]
  25: [1 2 1 2 1 2 1]
  26: [1 2 1 2 1]
  27: [1 2 1 3 4 3]
  28: [1 2 1 3 4]
  29: [1 2 1 3]
  30: [1 2 1]
  31: [1 2 2 1]
  32: [1 2 3 4]
  33: [1 2]
  34: [1 3 3 4]
  35: [1 3 4 1 1 1]
  36: [1 3 4 1]
  37: [1 3 4]
  38: [1 3]
  39: [2 1 1 1 1 1 1 1 1]
  40: [2 1 1 1 1 1 1 1]
  41: [2 1 1 1 1 1 1]
  42: [2 1 1 1 1 1]
  43: [2 1 1 1 1]
  44: [2 1 1 1 2 1 1 1]
  45: [2 1 1 1 2 1 2 1]
  46: [2 1 1 1 2 1]
  47: [2 1 1 1 3 4]
  48: [2 1 1 1]
  49: [2 1 1 2 1 1 1]
  50: [2 1 1 2 1 1]
  51: [2 1 1

Map: 100%|██████████| 1863/1863 [00:02<00:00, 759.82 examples/s]
Map: 100%|██████████| 408/408 [00:00<00:00, 706.23 examples/s]
Map: 100%|██████████| 334/334 [00:00<00:00, 698.79 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔧 Dialogue training config:
  Classes: 161
  Epochs: 2
  Learning rate: 3e-05
  Batch size: 16

🚀 Training dialogue act model...

Epoch 1/2


Training Dialogue: 100%|██████████| 117/117 [08:31<00:00,  4.37s/it, loss=5.0403]
Validating: 100%|██████████| 21/21 [00:18<00:00,  1.12it/s]


Train Loss: 4.7611
Val Accuracy: 0.0659
✅ New best dialogue model: 0.0659

Epoch 2/2


Training Dialogue: 100%|██████████| 117/117 [08:22<00:00,  4.30s/it, loss=3.9444]
Validating: 100%|██████████| 21/21 [00:21<00:00,  1.02s/it]


Train Loss: 4.3519
Val Accuracy: 0.1257
✅ New best dialogue model: 0.1257


Testing: 100%|██████████| 26/26 [00:25<00:00,  1.01it/s]



🎯 Final Dialogue Test Accuracy: 0.1422
✅ Improved dialogue model saved to: C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_improved

📊 Dialogue Act Results:
  Accuracy: 14.2%
  Training time: 18.2 minutes
  Improvement: 0.6% → 14.2%

🎉 IMPROVED TRAINING COMPLETED!
⏱️  Total Training Time: 186.9 minutes
🧠 Mental Health: 79.1% accuracy
💬 Dialogue Acts: 14.2% accuracy
✅ Both models ready for impressive demo tomorrow!

🧪 QUICK DEMO TEST:

Message: 'Hello, how are you today?'
  Mental Health: Normal (0.993)
  Dialogue Act: [2 1 2 1] (0.050)

Message: 'I feel really anxious and stressed'
  Mental Health: Anxiety (0.971)
  Dialogue Act: [1 1 1 1] (0.030)
  ⚠️  Needs attention!

Message: 'Thank you for your help!'
  Mental Health: Normal (0.992)
  Dialogue Act: [2 1] (0.026)

Message: 'Everything feels hopeless, I can't go on'
  Mental Health: Depression (0.570)
  Dialogue Act: [2 1] (0.030)
  ⚠️  Needs attention!

🚀 MODELS ARE READY FOR TOMORROW'S DEMO! 🚀


In [1]:
# OPTIMIZED TRAINING V2 - DIALOGUE ACT IMPROVEMENTS
# Target: Keep mental health at 79%+, boost dialogue acts to 40%+

import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer, BertForSequenceClassification, get_scheduler
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.utils import resample
from sklearn.utils.class_weight import compute_class_weight
from tqdm.auto import tqdm
import pickle
import time
from collections import Counter
import re

print("🚀 OPTIMIZED TRAINING V2 - DIALOGUE ACT IMPROVEMENTS")
print("=" * 60)

# ============================================================================
# MENTAL HEALTH MODEL (Already optimized - keep as is)
# ============================================================================

def keep_mental_health_model():
    """
    Your mental health model is already excellent at 79.1%!
    Keep using the existing trained model.
    """
    model_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_improved"
    
    try:
        tokenizer = BertTokenizer.from_pretrained(model_path)
        model = BertForSequenceClassification.from_pretrained(model_path)
        
        with open(f"{model_path}/label_encoder.pkl", "rb") as f:
            label_encoder = pickle.load(f)
        
        print("✅ Mental health model loaded successfully (79.1% accuracy)")
        return model, tokenizer, label_encoder
        
    except Exception as e:
        print(f"❌ Could not load mental health model: {e}")
        return None, None, None

# ============================================================================
# SIGNIFICANTLY IMPROVED DIALOGUE ACT MODEL
# ============================================================================

def optimized_dialogue_act_training():
    """
    Major improvements to fix the dialogue act classification
    """
    print("\n💬 OPTIMIZED DIALOGUE ACT MODEL V2")
    print("-" * 50)
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Load dataset
    print("📂 Loading dialogue dataset...")
    dataset = load_dataset("csv", data_files={
        "train": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\train.csv",
        "validation": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\validation.csv",
        "test": r"C:\Users\NAMAN\Documents\GitHub\Prototype-\Friendly Dataset\test.csv"
    })
    
    # MAJOR IMPROVEMENT 1: Use much more data
    def get_more_data(dataset_split, max_samples=15000):
        """Use much more data for better learning"""
        total_samples = len(dataset_split)
        sample_size = min(max_samples, total_samples)
        if sample_size < total_samples:
            indices = np.random.choice(total_samples, sample_size, replace=False)
            return dataset_split.select(indices.tolist())
        return dataset_split
    
    # Use much more data
    dataset['train'] = get_more_data(dataset['train'], 15000)  # 15k training samples
    dataset['validation'] = get_more_data(dataset['validation'], 3000)  # 3k validation
    dataset['test'] = get_more_data(dataset['test'], 2000)  # 2k test
    
    print(f"Using more data - Train: {len(dataset['train'])}, Val: {len(dataset['validation'])}, Test: {len(dataset['test'])}")
    
    # Preprocess
    def preprocess(batch):
        if isinstance(batch["dialog"][0], list):
            batch["dialog"] = [" ".join(conv) for conv in batch["dialog"]]
        return batch
    
    dataset = dataset.map(preprocess, batched=True)
    
    # MAJOR IMPROVEMENT 2: Simplify dialogue acts by grouping similar ones
    def simplify_dialogue_acts(act_list):
        """
        Group similar dialogue acts to reduce the number of classes
        """
        # Convert string representation to actual list
        def parse_act(act_str):
            if isinstance(act_str, str):
                # Remove brackets and split by spaces
                clean_str = act_str.strip('[]')
                return [int(x) for x in clean_str.split()]
            return act_str
        
        simplified_acts = []
        
        for act in act_list:
            parsed_act = parse_act(act)
            
            # Simplification rules based on common patterns
            if len(parsed_act) == 1:
                # Single acts
                if parsed_act[0] == 1:
                    simplified_acts.append("inform")
                elif parsed_act[0] == 2:
                    simplified_acts.append("question")
                elif parsed_act[0] == 3:
                    simplified_acts.append("request")
                elif parsed_act[0] == 4:
                    simplified_acts.append("response")
                else:
                    simplified_acts.append("other_single")
            
            elif len(parsed_act) == 2:
                # Two-act sequences
                if parsed_act == [1, 1]:
                    simplified_acts.append("inform_inform")
                elif parsed_act == [2, 1]:
                    simplified_acts.append("question_inform")
                elif parsed_act == [1, 2]:
                    simplified_acts.append("inform_question")
                elif parsed_act == [3, 4]:
                    simplified_acts.append("request_response")
                else:
                    simplified_acts.append("other_pair")
            
            elif len(parsed_act) <= 4:
                # Short sequences (3-4 acts)
                if all(x == 1 for x in parsed_act):
                    simplified_acts.append("multi_inform")
                elif 2 in parsed_act and 1 in parsed_act:
                    simplified_acts.append("question_inform_seq")
                elif 3 in parsed_act and 4 in parsed_act:
                    simplified_acts.append("request_response_seq")
                else:
                    simplified_acts.append("short_sequence")
            
            else:
                # Long sequences (5+ acts)
                if all(x in [1, 2] for x in parsed_act):
                    simplified_acts.append("long_inform_question")
                elif 3 in parsed_act or 4 in parsed_act:
                    simplified_acts.append("long_request_response")
                else:
                    simplified_acts.append("long_sequence")
        
        return simplified_acts
    
    # Apply simplification
    print("🔧 Simplifying dialogue acts...")
    
    # Collect all acts first
    all_acts = []
    for split in ['train', 'validation', 'test']:
        all_acts.extend(dataset[split]['act'])
    
    # Simplify
    simplified_acts = simplify_dialogue_acts(all_acts)
    
    # Map back to dataset
    act_idx = 0
    for split in ['train', 'validation', 'test']:
        split_size = len(dataset[split])
        split_acts = simplified_acts[act_idx:act_idx + split_size]
        dataset[split] = dataset[split].add_column('simplified_act', split_acts)
        act_idx += split_size
    
    # Check the new distribution
    simplified_counts = Counter(simplified_acts)
    print(f"Simplified to {len(simplified_counts)} dialogue act classes:")
    for act, count in simplified_counts.most_common(10):
        print(f"  {act}: {count}")
    
    # IMPROVEMENT 3: Filter out very rare classes (less than 20 samples)
    min_samples = 20
    common_acts = [act for act, count in simplified_counts.items() if count >= min_samples]
    
    def filter_common_acts(example):
        return example['simplified_act'] in common_acts
    
    dataset = dataset.filter(filter_common_acts)
    print(f"After filtering rare acts: using {len(common_acts)} classes")
    print(f"Final data sizes - Train: {len(dataset['train'])}, Val: {len(dataset['validation'])}, Test: {len(dataset['test'])}")
    
    # Label encoding for simplified acts
    label_encoder = LabelEncoder()
    all_simplified_acts = []
    for split in ['train', 'validation', 'test']:
        all_simplified_acts.extend(dataset[split]['simplified_act'])
    label_encoder.fit(all_simplified_acts)
    
    def encode_labels(example):
        return {"act_label": label_encoder.transform([example["simplified_act"]])[0]}
    
    dataset = dataset.map(encode_labels)
    
    print(f"Final dialogue act classes ({len(label_encoder.classes_)}):")
    for i, act in enumerate(label_encoder.classes_):
        print(f"  {i}: {act}")
    
    # IMPROVEMENT 4: Better tokenization and preprocessing
    try:
        tokenizer = BertTokenizer.from_pretrained(r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased")
    except:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    
    def enhanced_tokenize(batch):
        # Clean and enhance dialogue text
        enhanced_dialogs = []
        for dialog in batch["dialog"]:
            # Add special tokens to help model understand dialogue structure
            if isinstance(dialog, str):
                # Simple enhancement: add context markers
                enhanced = f"[DIALOG] {dialog.strip()} [/DIALOG]"
                enhanced_dialogs.append(enhanced)
            else:
                enhanced_dialogs.append(str(dialog))
        
        return tokenizer(
            enhanced_dialogs, 
            truncation=True, 
            padding="max_length", 
            max_length=96  # Increased for dialogue context
        )
    
    dataset = dataset.map(enhanced_tokenize, batched=True)
    dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "act_label"])
    
    # IMPROVEMENT 5: Better data loading with class balancing
    def balanced_collate_fn(batch):
        input_ids = torch.stack([item["input_ids"] for item in batch])
        attention_mask = torch.stack([item["attention_mask"] for item in batch])
        labels = torch.tensor([int(item["act_label"]) for item in batch], dtype=torch.long)
        return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}
    
    # Create balanced training loader
    train_labels = [int(item["act_label"]) for item in dataset["train"]]
    class_weights = compute_class_weight(
        'balanced',
        classes=np.unique(train_labels),
        y=train_labels
    )
    
    print("Class weights for balanced training:")
    for i, weight in enumerate(class_weights):
        print(f"  {label_encoder.classes_[i]}: {weight:.3f}")
    
    # DataLoaders with optimal batch size
    batch_size = 24  # Slightly larger for efficiency
    train_loader = DataLoader(dataset["train"], batch_size=batch_size, shuffle=True, collate_fn=balanced_collate_fn)
    val_loader = DataLoader(dataset["validation"], batch_size=batch_size, collate_fn=balanced_collate_fn)
    test_loader = DataLoader(dataset["test"], batch_size=batch_size, collate_fn=balanced_collate_fn)
    
    # IMPROVEMENT 6: Model with better configuration
    num_labels = len(label_encoder.classes_)
    try:
        model = BertForSequenceClassification.from_pretrained(
            r"C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased",
            num_labels=num_labels,
            hidden_dropout_prob=0.2,  # Add dropout for regularization
            attention_probs_dropout_prob=0.2
        )
    except:
        model = BertForSequenceClassification.from_pretrained(
            'bert-base-uncased', 
            num_labels=num_labels,
            hidden_dropout_prob=0.2,
            attention_probs_dropout_prob=0.2
        )
    
    model.to(device)
    
    # IMPROVEMENT 7: Optimized training configuration
    num_epochs = 4  # More epochs for better learning
    learning_rate = 1e-5  # Lower learning rate for stability
    warmup_ratio = 0.1
    weight_decay = 0.02  # Stronger regularization
    
    optimizer = AdamW(
        model.parameters(), 
        lr=learning_rate, 
        weight_decay=weight_decay,
        betas=(0.9, 0.999),
        eps=1e-8
    )
    
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = int(warmup_ratio * num_training_steps)
    
    lr_scheduler = get_scheduler(
        "cosine_with_restarts",  # Better scheduler
        optimizer=optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    # Weighted loss
    class_weights_tensor = torch.FloatTensor(class_weights).to(device)
    criterion = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)
    
    print(f"\n🔧 Optimized dialogue training config:")
    print(f"  Classes: {num_labels} (reduced from 161)")
    print(f"  Training samples: {len(dataset['train'])}")
    print(f"  Epochs: {num_epochs}")
    print(f"  Learning rate: {learning_rate}")
    print(f"  Batch size: {batch_size}")
    print(f"  Max length: 96 tokens")
    
    # IMPROVEMENT 8: Advanced training loop
    def train_epoch_advanced(model, train_loader, optimizer, lr_scheduler, criterion, device):
        model.train()
        total_loss = 0
        total_correct = 0
        total_samples = 0
        
        progress_bar = tqdm(train_loader, desc="Training")
        
        for batch in progress_bar:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)
            
            optimizer.zero_grad()
            
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            
            # Calculate accuracy
            predictions = torch.argmax(outputs.logits, dim=-1)
            correct = (predictions == labels).sum().item()
            total_correct += correct
            total_samples += labels.size(0)
            
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            
            optimizer.step()
            lr_scheduler.step()
            
            total_loss += loss.item()
            
            # Update progress bar with current accuracy
            current_acc = total_correct / total_samples
            progress_bar.set_postfix({
                "loss": f"{loss.item():.4f}",
                "acc": f"{current_acc:.3f}"
            })
        
        return total_loss / len(train_loader), total_correct / total_samples
    
    def evaluate_advanced(model, val_loader, device):
        model.eval()
        all_predictions = []
        all_labels = []
        total_loss = 0
        
        criterion_eval = torch.nn.CrossEntropyLoss()
        
        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Evaluating"):
                input_ids = batch["input_ids"].to(device)
                attention_mask = batch["attention_mask"].to(device)
                labels = batch["labels"].to(device)
                
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = criterion_eval(outputs.logits, labels)
                total_loss += loss.item()
                
                predictions = torch.argmax(outputs.logits, dim=-1)
                all_predictions.extend(predictions.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        accuracy = accuracy_score(all_labels, all_predictions)
        f1 = f1_score(all_labels, all_predictions, average='weighted')
        avg_loss = total_loss / len(val_loader)
        
        return accuracy, f1, avg_loss, all_predictions, all_labels
    
    # IMPROVEMENT 9: Training with learning rate finding and early stopping
    print("\n🚀 Starting optimized dialogue training...")
    
    best_f1 = 0
    best_accuracy = 0
    best_model_state = None
    patience = 3
    patience_counter = 0
    
    training_start_time = time.time()
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch + 1}/{num_epochs}")
        print("-" * 40)
        
        # Train
        train_loss, train_acc = train_epoch_advanced(model, train_loader, optimizer, lr_scheduler, criterion, device)
        
        # Validate
        val_accuracy, val_f1, val_loss, val_predictions, val_labels = evaluate_advanced(model, val_loader, device)
        
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}, Val F1: {val_f1:.4f}")
        
        # Save best model
        if val_f1 > best_f1:
            best_f1 = val_f1
            best_accuracy = val_accuracy
            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"✅ New best dialogue model! F1: {best_f1:.4f}, Acc: {best_accuracy:.4f}")
        else:
            patience_counter += 1
            print(f"⏳ No improvement. Patience: {patience_counter}/{patience}")
        
        if patience_counter >= patience and epoch >= 2:  # At least 3 epochs
            print("🛑 Early stopping triggered!")
            break
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
        print(f"\n📥 Loaded best dialogue model")
    
    # Final test evaluation
    print("\n🎯 Final dialogue model evaluation...")
    test_accuracy, test_f1, _, test_predictions, test_labels = evaluate_advanced(model, test_loader, device)
    
    training_time = (time.time() - training_start_time) / 60
    
    print(f"\nFinal Dialogue Results:")
    print(f"  Test Accuracy: {test_accuracy:.4f}")
    print(f"  Test F1-Score: {test_f1:.4f}")
    print(f"  Training Time: {training_time:.1f} minutes")
    print(f"  Improvement: 14.2% → {test_accuracy:.1%}")
    
    # Detailed report
    print(f"\nDetailed Classification Report:")
    target_names = label_encoder.classes_
    print(classification_report(test_labels, test_predictions, target_names=target_names, digits=4))
    
    # Save optimized model
    model_save_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_v2_optimized"
    model.save_pretrained(model_save_path)
    tokenizer.save_pretrained(model_save_path)
    
    with open(f"{model_save_path}/label_encoder.pkl", "wb") as f:
        pickle.dump(label_encoder, f)
    
    # Save simplified acts mapping for inference
    simplified_mapping = {
        'original_classes': len(Counter(all_acts)),
        'simplified_classes': len(label_encoder.classes_),
        'class_names': label_encoder.classes_.tolist(),
        'training_accuracy': test_accuracy
    }
    
    with open(f"{model_save_path}/simplification_info.pkl", "wb") as f:
        pickle.dump(simplified_mapping, f)
    
    print(f"✅ Optimized dialogue model saved to: {model_save_path}")
    
    return model, tokenizer, label_encoder, test_accuracy

# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    print("🎯 OPTIMIZED TRAINING V2 SESSION")
    print("Target: Keep mental health 79%+, boost dialogue acts to 40%+")
    print("=" * 60)
    
    total_start_time = time.time()
    
    try:
        # Load existing mental health model (already great!)
        print("Phase 1: Loading Mental Health Model (Already Optimized)")
        mental_model, mental_tokenizer, mental_encoder = keep_mental_health_model()
        mental_accuracy = 0.791  # Your achieved accuracy
        
        if mental_model is None:
            print("❌ Mental health model not found. Please retrain it first.")
            exit(1)
        
        # Train optimized dialogue act model
        print("\nPhase 2: Optimized Dialogue Act Model")
        dialogue_start_time = time.time()
        dialogue_model, dialogue_tokenizer, dialogue_encoder, dialogue_accuracy = optimized_dialogue_act_training()
        dialogue_time = time.time() - dialogue_start_time
        
        total_time = time.time() - total_start_time
        
        print("\n" + "=" * 60)
        print("🎉 OPTIMIZED TRAINING V2 COMPLETED!")
        print("=" * 60)
        print(f"⏱️  Total Time: {total_time/60:.1f} minutes")
        print(f"🧠 Mental Health: {mental_accuracy:.1%} (maintained)")
        print(f"💬 Dialogue Acts: {dialogue_accuracy:.1%} (improved from 14.2%)")
        print("✅ Both models ready for production!")
        
        # Enhanced demo test
        print("\n🧪 ENHANCED DEMO TEST:")
        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        mental_model.to(device)
        dialogue_model.to(device)
        
        def enhanced_demo_test(text):
            # Test mental health
            inputs = mental_tokenizer(text, truncation=True, padding='max_length', max_length=64, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = mental_model(**inputs)
                mental_pred = torch.argmax(outputs.logits, dim=-1).item()
                mental_probs = torch.softmax(outputs.logits, dim=-1)[0]
                mental_conf = mental_probs[mental_pred].item()
            
            mental_state = mental_encoder.inverse_transform([mental_pred])[0]
            
            # Test dialogue act with enhanced input
            enhanced_text = f"[DIALOG] {text} [/DIALOG]"
            inputs = dialogue_tokenizer(enhanced_text, truncation=True, padding='max_length', max_length=96, return_tensors='pt')
            inputs = {k: v.to(device) for k, v in inputs.items()}
            
            with torch.no_grad():
                outputs = dialogue_model(**inputs)
                dialogue_pred = torch.argmax(outputs.logits, dim=-1).item()
                dialogue_probs = torch.softmax(outputs.logits, dim=-1)[0]
                dialogue_conf = dialogue_probs[dialogue_pred].item()
                
                # Get top 3 predictions for dialogue
                top_3_indices = torch.topk(dialogue_probs, 3).indices
                top_3_acts = [dialogue_encoder.inverse_transform([idx.item()])[0] for idx in top_3_indices]
                top_3_confs = [dialogue_probs[idx].item() for idx in top_3_indices]
            
            dialogue_act = dialogue_encoder.inverse_transform([dialogue_pred])[0]
            
            return mental_state, mental_conf, dialogue_act, dialogue_conf, top_3_acts, top_3_confs
        
        # Enhanced demo messages
        demo_messages = [
            "Hello, how are you today?",
            "I feel really anxious and stressed about everything",
            "Thank you so much for your help and support!",
            "I can't handle this anymore, life feels hopeless",
            "Could you please help me with my problem?",
            "Yes, I understand what you mean",
            "What do you think about this situation?"
        ]
        
        for msg in demo_messages:
            try:
                mental_state, mental_conf, dialogue_act, dialogue_conf, top_acts, top_confs = enhanced_demo_test(msg)
                print(f"\nMessage: '{msg}'")
                print(f"  🧠 Mental Health: {mental_state} ({mental_conf:.3f})")
                print(f"  💬 Primary Dialogue Act: {dialogue_act} ({dialogue_conf:.3f})")
                print(f"  📊 Top 3 Dialogue Acts:")
                for i, (act, conf) in enumerate(zip(top_acts[:3], top_confs[:3])):
                    print(f"     {i+1}. {act}: {conf:.3f}")
                
                if mental_state in ['Anxiety', 'Depression', 'Suicidal', 'Stress']:
                    print("  ⚠️  ALERT: Mental health support needed!")
                    
            except Exception as e:
                print(f"  Error testing '{msg}': {str(e)}")
        
        print(f"\n🚀 OPTIMIZED MODELS READY!")
        print(f"Mental Health Model: 79.1% accuracy (excellent!)")
        print(f"Dialogue Act Model: {dialogue_accuracy:.1%} accuracy (much improved!)")
        
    except Exception as e:
        print(f"❌ Error during optimization: {str(e)}")
        import traceback
        traceback.print_exc()

# ============================================================================
# ADDITIONAL UTILITIES FOR PRODUCTION
# ============================================================================

def create_inference_pipeline():
    """
    Create a simple inference pipeline for production use
    """
    print("\n🔧 CREATING PRODUCTION INFERENCE PIPELINE")
    
    class ChatbotInference:
        def __init__(self):
            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            self.load_models()
        
        def load_models(self):
            # Load mental health model
            mental_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_mental_health_bert_improved"
            self.mental_tokenizer = BertTokenizer.from_pretrained(mental_path)
            self.mental_model = BertForSequenceClassification.from_pretrained(mental_path)
            with open(f"{mental_path}/label_encoder.pkl", "rb") as f:
                self.mental_encoder = pickle.load(f)
            
            # Load dialogue act model
            dialogue_path = r"C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_v2_optimized"
            self.dialogue_tokenizer = BertTokenizer.from_pretrained(dialogue_path)
            self.dialogue_model = BertForSequenceClassification.from_pretrained(dialogue_path)
            with open(f"{dialogue_path}/label_encoder.pkl", "rb") as f:
                self.dialogue_encoder = pickle.load(f)
            
            # Move to device
            self.mental_model.to(self.device)
            self.dialogue_model.to(self.device)
            
            # Set to eval mode
            self.mental_model.eval()
            self.dialogue_model.eval()
        
        def predict(self, user_message):
            """
            Single function to get both predictions
            """
            with torch.no_grad():
                # Mental health prediction
                mental_inputs = self.mental_tokenizer(
                    user_message, 
                    truncation=True, 
                    padding='max_length', 
                    max_length=64, 
                    return_tensors='pt'
                )
                mental_inputs = {k: v.to(self.device) for k, v in mental_inputs.items()}
                
                mental_outputs = self.mental_model(**mental_inputs)
                mental_pred = torch.argmax(mental_outputs.logits, dim=-1).item()
                mental_conf = torch.softmax(mental_outputs.logits, dim=-1)[0][mental_pred].item()
                mental_state = self.mental_encoder.inverse_transform([mental_pred])[0]
                
                # Dialogue act prediction
                enhanced_message = f"[DIALOG] {user_message} [/DIALOG]"
                dialogue_inputs = self.dialogue_tokenizer(
                    enhanced_message,
                    truncation=True,
                    padding='max_length',
                    max_length=96,
                    return_tensors='pt'
                )
                dialogue_inputs = {k: v.to(self.device) for k, v in dialogue_inputs.items()}
                
                dialogue_outputs = self.dialogue_model(**dialogue_inputs)
                dialogue_pred = torch.argmax(dialogue_outputs.logits, dim=-1).item()
                dialogue_conf = torch.softmax(dialogue_outputs.logits, dim=-1)[0][dialogue_pred].item()
                dialogue_act = self.dialogue_encoder.inverse_transform([dialogue_pred])[0]
                
                return {
                    'mental_health': {
                        'state': mental_state,
                        'confidence': mental_conf,
                        'needs_attention': mental_state in ['Anxiety', 'Depression', 'Suicidal', 'Stress', 'Bipolar', 'Personality disorder']
                    },
                    'dialogue_act': {
                        'act': dialogue_act,
                        'confidence': dialogue_conf
                    },
                    'message': user_message
                }
        
        def batch_predict(self, messages):
            """
            Predict for multiple messages at once (more efficient)
            """
            results = []
            for msg in messages:
                results.append(self.predict(msg))
            return results
    
    # Save inference pipeline
    pipeline = ChatbotInference()
    
    # Test the pipeline
    test_messages = [
        "Hi there, how are you?",
        "I'm feeling very depressed today",
        "Can you help me with something?"
    ]
    
    print("Testing inference pipeline:")
    for msg in test_messages:
        result = pipeline.predict(msg)
        print(f"\nInput: '{msg}'")
        print(f"Mental Health: {result['mental_health']['state']} ({result['mental_health']['confidence']:.3f})")
        print(f"Dialogue Act: {result['dialogue_act']['act']} ({result['dialogue_act']['confidence']:.3f})")
        if result['mental_health']['needs_attention']:
            print("🚨 ALERT: Mental health support needed!")
    
    return pipeline

# ============================================================================
# PERFORMANCE COMPARISON REPORT
# ============================================================================

def generate_performance_report():
    """
    Generate a comprehensive performance report
    """
    print("\n📊 PERFORMANCE COMPARISON REPORT")
    print("=" * 60)
    
    # Performance data
    performance_data = {
        'Mental Health Model': {
            'Original': {'accuracy': 63.1, 'f1': None, 'training_time': 'N/A'},
            'Improved V1': {'accuracy': 79.1, 'f1': 78.9, 'training_time': 165.3},
            'Status': 'Excellent - Target Achieved ✅'
        },
        'Dialogue Act Model': {
            'Original': {'accuracy': 0.6, 'f1': None, 'training_time': 'N/A'},
            'Improved V1': {'accuracy': 14.2, 'f1': None, 'training_time': 18.2},
            'Target V2': {'accuracy': '40+', 'f1': '35+', 'training_time': '30-45'},
            'Status': 'Needs V2 Optimization 🔧'
        }
    }
    
    print("Mental Health Model Performance:")
    print("  Original:    63.1% accuracy")
    print("  Improved V1: 79.1% accuracy (+15.9%)")
    print("  Status:      ✅ Excellent - Exceeds 75-80% target")
    print("  Time:        2.8 hours")
    
    print("\nDialogue Act Model Performance:")
    print("  Original:    0.6% accuracy")
    print("  Improved V1: 14.2% accuracy (+13.6%)")
    print("  Target V2:   40%+ accuracy (with optimizations)")
    print("  Status:      🔧 Needs further optimization")
    print("  Time V1:     18 minutes")
    print("  Time V2:     30-45 minutes (estimated)")
    
    print("\nKey Improvements Made:")
    print("  ✅ Increased training data (5k → 25k samples)")
    print("  ✅ Balanced dataset with resampling")
    print("  ✅ Class-weighted loss function")
    print("  ✅ Better learning rate scheduling")
    print("  ✅ Increased sequence length (32 → 64 tokens)")
    print("  ✅ Early stopping with F1-score monitoring")
    print("  ✅ Gradient clipping for stability")
    
    print("\nV2 Optimizations for Dialogue Acts:")
    print("  🔧 Simplified dialogue acts (161 → ~15-20 classes)")
    print("  🔧 Much more training data (1.8k → 15k samples)")
    print("  🔧 Enhanced tokenization with dialogue markers")
    print("  🔧 Better class balancing")
    print("  🔧 Longer sequences (64 → 96 tokens)")
    print("  🔧 Advanced training loop with accuracy tracking")
    
    print("\nExpected Results After V2:")
    print("  🎯 Mental Health: 79.1% (maintained)")
    print("  🎯 Dialogue Acts: 40-50% (major improvement)")
    print("  🎯 Total training time: ~3.5 hours")
    print("  🎯 Production ready models")

# ============================================================================
# QUICK FIXES FOR IMMEDIATE IMPROVEMENT
# ============================================================================

def apply_quick_fixes():
    """
    Apply quick fixes that can be done without full retraining
    """
    print("\n⚡ QUICK FIXES FOR IMMEDIATE IMPROVEMENT")
    print("-" * 50)
    
    print("1. 🔧 Post-processing improvements:")
    print("   - Add confidence thresholding")
    print("   - Implement fallback predictions")
    print("   - Add ensemble voting (if multiple models)")
    
    print("\n2. 🎯 Mental Health Model Enhancements:")
    print("   - Already excellent at 79.1%")
    print("   - Could add rule-based post-processing for edge cases")
    print("   - Consider ensemble with rule-based system")
    
    print("\n3. 💬 Dialogue Act Quick Wins:")
    print("   - Group similar acts into broader categories")
    print("   - Use keyword-based fallbacks for common cases")
    print("   - Implement confidence-based filtering")
    
    # Implement confidence-based prediction improvement
    def improved_predict_with_confidence(model, tokenizer, label_encoder, text, threshold=0.7):
        """
        Enhanced prediction with confidence thresholding
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        inputs = tokenizer(text, truncation=True, padding='max_length', max_length=64, return_tensors='pt')
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.softmax(outputs.logits, dim=-1)[0]
            pred = torch.argmax(probs, dim=-1).item()
            conf = probs[pred].item()
            
            if conf < threshold:
                # Low confidence - could implement fallback logic here
                print(f"⚠️ Low confidence ({conf:.3f}) - consider manual review")
            
            return label_encoder.inverse_transform([pred])[0], conf
    
    print("\n4. 🚀 Production Optimizations:")
    print("   - Model quantization for faster inference")
    print("   - Batch processing for multiple messages")
    print("   - Caching for repeated queries")
    print("   - GPU acceleration if available")
    
    return improved_predict_with_confidence

# Run the additional utilities
if __name__ == "__main__":
    # Generate performance report
    generate_performance_report()
    
    # Apply quick fixes
    improved_predict = apply_quick_fixes()
    
    print("\n" + "="*60)
    print("🎯 SUMMARY AND RECOMMENDATIONS")
    print("="*60)
    
    print("\nIMMEDIATE ACTIONS:")
    print("1. ✅ Mental Health Model: READY (79.1% accuracy)")
    print("2. 🔧 Dialogue Act Model: Run V2 optimization")
    print("3. 🚀 Deploy inference pipeline")
    print("4. 📊 Monitor performance in production")
    
    print("\nEXPECTED FINAL RESULTS:")
    print("- Mental Health: 79.1% accuracy (excellent)")
    print("- Dialogue Acts: 40-50% accuracy (good for 15-20 classes)")
    print("- Total training time: ~3.5 hours")
    print("- Production-ready inference pipeline")
    
    print("\n🎉 YOUR MODELS WILL BE IMPRESSIVE FOR THE DEMO!")
    print("The mental health model already exceeds expectations!")
    print("The dialogue act model will be much better after V2!")

# ============================================================================
# INFERENCE EXAMPLE FOR DEMO
# ============================================================================

def create_demo_script():
    """
    Create a demo script showing the models in action
    """
    print("\n🎬 DEMO SCRIPT")
    print("-" * 30)
    
    demo_conversations = [
        {
            "user": "Hello, I'm new here. How does this work?",
            "expected_mental": "Normal",
            "expected_dialogue": "question"
        },
        {
            "user": "I've been feeling really anxious lately about work",
            "expected_mental": "Anxiety",
            "expected_dialogue": "inform"
        },
        {
            "user": "Thank you so much for listening and helping me",
            "expected_mental": "Normal",
            "expected_dialogue": "response"
        },
        {
            "user": "I can't take this anymore, everything is hopeless",
            "expected_mental": "Suicidal",
            "expected_dialogue": "inform"
        }
    ]
    
    print("Demo Conversation Examples:")
    for i, conv in enumerate(demo_conversations, 1):
        print(f"\n{i}. User: \"{conv['user']}\"")
        print(f"   Expected Mental Health: {conv['expected_mental']}")
        print(f"   Expected Dialogue Act: {conv['expected_dialogue']}")
        print(f"   System Response: [Contextual response based on predictions]")
    
    print("\n🎯 This demonstrates:")
    print("- Accurate mental health state detection")
    print("- Appropriate dialogue act classification")
    print("- Crisis intervention capabilities")
    print("- Natural conversation understanding")
    
    return demo_conversations

  from .autonotebook import tqdm as notebook_tqdm


🚀 OPTIMIZED TRAINING V2 - DIALOGUE ACT IMPROVEMENTS
🎯 OPTIMIZED TRAINING V2 SESSION
Target: Keep mental health 79%+, boost dialogue acts to 40%+
Phase 1: Loading Mental Health Model (Already Optimized)
✅ Mental health model loaded successfully (79.1% accuracy)

Phase 2: Optimized Dialogue Act Model

💬 OPTIMIZED DIALOGUE ACT MODEL V2
--------------------------------------------------
📂 Loading dialogue dataset...
Using more data - Train: 11118, Val: 1000, Test: 1000
🔧 Simplifying dialogue acts...
Simplified to 11 dialogue act classes:
  long_request_response: 6691
  long_inform_question: 3107
  question_inform_seq: 1645
  request_response_seq: 639
  multi_inform: 359
  inform_inform: 201
  question_inform: 183
  request_response: 124
  short_sequence: 112
  other_pair: 37
After filtering rare acts: using 11 classes
Final data sizes - Train: 11118, Val: 1000, Test: 1000
Final dialogue act classes (11):
  0: inform_inform
  1: inform_question
  2: long_inform_question
  3: long_request_re

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at C:\Users\NAMAN\Documents\GitHub\Prototype-\bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



🔧 Optimized dialogue training config:
  Classes: 11 (reduced from 161)
  Training samples: 11118
  Epochs: 4
  Learning rate: 1e-05
  Batch size: 24
  Max length: 96 tokens

🚀 Starting optimized dialogue training...

Epoch 1/4
----------------------------------------


Training: 100%|██████████| 464/464 [1:13:23<00:00,  9.49s/it, loss=1.3136, acc=0.340]
Evaluating: 100%|██████████| 42/42 [01:25<00:00,  2.03s/it]


Train Loss: 2.0386, Train Acc: 0.3402
Val Loss: 1.2336, Val Acc: 0.6020, Val F1: 0.6217
✅ New best dialogue model! F1: 0.6217, Acc: 0.6020

Epoch 2/4
----------------------------------------


Training: 100%|██████████| 464/464 [1:12:01<00:00,  9.31s/it, loss=1.2587, acc=0.666]
Evaluating: 100%|██████████| 42/42 [01:25<00:00,  2.04s/it]


Train Loss: 1.4049, Train Acc: 0.6658
Val Loss: 0.7239, Val Acc: 0.7840, Val F1: 0.7809
✅ New best dialogue model! F1: 0.7809, Acc: 0.7840

Epoch 3/4
----------------------------------------


Training: 100%|██████████| 464/464 [1:22:05<00:00, 10.62s/it, loss=1.6559, acc=0.728]
Evaluating: 100%|██████████| 42/42 [01:09<00:00,  1.66s/it]


Train Loss: 1.0668, Train Acc: 0.7281
Val Loss: 0.6869, Val Acc: 0.7600, Val F1: 0.7674
⏳ No improvement. Patience: 1/3

Epoch 4/4
----------------------------------------


Training: 100%|██████████| 464/464 [1:43:42<00:00, 13.41s/it, loss=1.1076, acc=0.744]  
Evaluating: 100%|██████████| 42/42 [01:17<00:00,  1.84s/it]


Train Loss: 0.9790, Train Acc: 0.7441
Val Loss: 0.6704, Val Acc: 0.7770, Val F1: 0.7826
✅ New best dialogue model! F1: 0.7826, Acc: 0.7770

📥 Loaded best dialogue model

🎯 Final dialogue model evaluation...


Evaluating: 100%|██████████| 42/42 [01:38<00:00,  2.34s/it]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])



Final Dialogue Results:
  Test Accuracy: 0.7540
  Test F1-Score: 0.7552
  Training Time: 338.2 minutes
  Improvement: 14.2% → 75.4%

Detailed Classification Report:
                       precision    recall  f1-score   support

        inform_inform     0.6190    0.9286    0.7429        14
      inform_question     0.0000    0.0000    0.0000         2
 long_inform_question     0.6325    0.7992    0.7061       239
long_request_response     0.8910    0.7490    0.8139       502
         multi_inform     0.7105    0.9000    0.7941        30
           other_pair     0.0000    0.0000    0.0000         4
      question_inform     0.5556    0.8333    0.6667        12
  question_inform_seq     0.8667    0.7879    0.8254       132
     request_response     0.6000    0.3000    0.4000        10
 request_response_seq     0.4054    0.6667    0.5042        45
       short_sequence     0.0000    0.0000    0.0000        10

             accuracy                         0.7540      1000
            m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


✅ Optimized dialogue model saved to: C:\Users\NAMAN\Documents\GitHub\Prototype-\trained_dialogue_bert_v2_optimized

🎉 OPTIMIZED TRAINING V2 COMPLETED!
⏱️  Total Time: 338.5 minutes
🧠 Mental Health: 79.1% (maintained)
💬 Dialogue Acts: 75.4% (improved from 14.2%)
✅ Both models ready for production!

🧪 ENHANCED DEMO TEST:

Message: 'Hello, how are you today?'
  🧠 Mental Health: Normal (0.993)
  💬 Primary Dialogue Act: question_inform (0.725)
  📊 Top 3 Dialogue Acts:
     1. question_inform: 0.725
     2. other_pair: 0.069
     3. inform_question: 0.068

Message: 'I feel really anxious and stressed about everything'
  🧠 Mental Health: Anxiety (0.972)
  💬 Primary Dialogue Act: inform_inform (0.525)
  📊 Top 3 Dialogue Acts:
     1. inform_inform: 0.525
     2. request_response: 0.204
     3. question_inform: 0.112
  ⚠️  ALERT: Mental health support needed!

Message: 'Thank you so much for your help and support!'
  🧠 Mental Health: Normal (0.991)
  💬 Primary Dialogue Act: inform_inform (0.546