# Train & Evaluate: Separate Models

This notebook trains classifiers on features from each model separately.

**Steps:**
1. Load features from Drive (saved by 02_feature_extraction_separate.ipynb)
2. Train multiple classifiers (LogisticRegression, LinearSVC, RandomForest, XGBoost, LightGBM)
3. Evaluate on Dev set
4. Save predictions and probabilities to Drive
5. Print results tables and create plots

**Output:** Predictions, probabilities, and results saved to Drive for each model/classifier/task combination.


In [None]:
# Setup (run previous notebooks first)
import sys
from pathlib import Path
import numpy as np

BASE_PATH = Path('/content/semeval-context-tree-modular')
DATA_PATH = Path('/content/drive/MyDrive/semeval_data')
sys.path.insert(0, str(BASE_PATH))

from src.storage.manager import StorageManager
from src.models.trainer import train_and_evaluate
from src.models.classifiers import get_classifier_dict

storage = StorageManager(
    base_path=str(BASE_PATH),
    data_path=str(DATA_PATH),
    github_path=str(BASE_PATH)
)

# Load splits for labels
train_ds = storage.load_split('train')
dev_ds = storage.load_split('dev')

print("✅ Setup complete!")


In [None]:
# Model and task configurations
MODELS = ['bert', 'roberta', 'deberta', 'xlnet']
TASKS = ['clarity', 'evasion']

# Label mappings
CLARITY_LABELS = ['Clear Reply', 'Ambiguous', 'Clear Non-Reply']
EVASION_LABELS = ['Direct Answer', 'Partial Answer', 'Implicit Answer', 
                  'Uncertainty', 'Refusal', 'Clarification', 
                  'Question', 'Topic Shift', 'Other']

# Get classifiers
classifiers = get_classifier_dict(random_state=42)
print(f"✅ Classifiers: {list(classifiers.keys())}")


In [None]:
# Train and evaluate for each model and task
all_results = {}

for model in MODELS:
    print(f"\n{'='*80}")
    print(f"MODEL: {model.upper()}")
    print(f"{'='*80}")
    
    all_results[model] = {}
    
    for task in TASKS:
        print(f"\n{'='*60}")
        print(f"TASK: {task.upper()}")
        print(f"{'='*60}")
        
        # Get label list
        if task == 'clarity':
            label_list = CLARITY_LABELS
            label_key = 'clarity_label'
        else:  # evasion
            label_list = EVASION_LABELS
            label_key = 'evasion_label'
        
        # Load features
        print("Loading features...")
        X_train = storage.load_features(model, task, 'train')
        X_dev = storage.load_features(model, task, 'dev')
        
        # Get labels
        y_train = np.array([train_ds[i][label_key] for i in range(len(train_ds))])
        y_dev = np.array([dev_ds[i][label_key] for i in range(len(dev_ds))])
        
        print(f"  Train: {X_train.shape[0]} samples, {X_train.shape[1]} features")
        print(f"  Dev: {X_dev.shape[0]} samples, {X_dev.shape[1]} features")
        
        # Train and evaluate
        results = train_and_evaluate(
            X_train, y_train, X_dev, y_dev,
            label_list=label_list,
            task_name=f"{model}_{task}",
            classifiers=classifiers,
            random_state=42,
            print_report=True,
            print_table=True,
            create_plots=True,
            save_plots_dir=str(DATA_PATH / 'plots')
        )
        
        # Save predictions and probabilities
        for classifier_name, result in results.items():
            # Save predictions (hard labels)
            storage.save_predictions(
                result['dev_pred'],
                model, classifier_name, task, 'dev'
            )
            
            # Save probabilities (if available)
            if result['dev_proba'] is not None:
                storage.save_probabilities(
                    result['dev_proba'],
                    model, classifier_name, task, 'dev'
                )
        
        all_results[model][task] = results
        
        # Save results summary
        experiment_id = f"{model}_{task}_separate"
        storage.save_results({
            'model': model,
            'task': task,
            'results': {
                name: {
                    'metrics': res['metrics'],
                    'n_train': len(y_train),
                    'n_dev': len(y_dev)
                }
                for name, res in results.items()
            }
        }, experiment_id)

print(f"\n{'='*80}")
print("✅ Training and evaluation complete for all models!")
print(f"{'='*80}")
