In [1]:
# Member 0149 - Complete Baseline Model Training and Evaluation
import sys
import os
sys.path.append(os.path.join(os.getcwd(), '../src'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Import our custom modules
from models import baseline_models_0149  # Import the module
from evaluation import model_evaluator_0149  # Import the module

# Now access the classes
from models.baseline_models_0149 import BaselineModels
from evaluation.model_evaluator_0149 import ModelEvaluator

print("=== MEMBER 0149: BASELINE MODELS TRAINING & EVALUATION ===")
print("ITBIN-2211-0149 - Fake News Detection Project")
print("-" * 60)

=== MEMBER 0149: BASELINE MODELS TRAINING & EVALUATION ===
ITBIN-2211-0149 - Fake News Detection Project
------------------------------------------------------------


In [2]:
# Step 1: Load Data
print("STEP 1: Loading Data...")
try:
    train_df = pd.read_csv('../data/processed/train_processed.csv')
    test_df = pd.read_csv('../data/processed/test_processed.csv')
    valid_df = pd.read_csv('../data/processed/valid_processed.csv')
    
    print(f"✓ Training data: {train_df.shape}")
    print(f"✓ Test data: {test_df.shape}")
    print(f"✓ Validation data: {valid_df.shape}")
except FileNotFoundError as e:
    print(f"Error loading data: {e}")
    print("Please ensure you have run the data loading step first!")
    exit(1)

STEP 1: Loading Data...
✓ Training data: (10240, 14)
✓ Test data: (1267, 14)
✓ Validation data: (1284, 14)


In [3]:
# Step 2: Initialize Models
print("STEP 2: Initializing Baseline Models...")
baseline_models = BaselineModels(random_state=42)
evaluator = ModelEvaluator()

STEP 2: Initializing Baseline Models...


In [4]:
# Step 3: Prepare Data
print("STEP 3: Preparing Data...")
X_train, X_test, y_train, y_test = baseline_models.prepare_data(train_df, test_df, valid_df)
print(f"✓ Training samples: {len(X_train)}")
print(f"✓ Test samples: {len(X_test)}")

STEP 3: Preparing Data...
Preprocessing text...
✓ Training samples: 11524
✓ Test samples: 1267


In [None]:
# Step 4: Train Models
print("STEP 4: Training Baseline Models...")
print("This may take several minutes...")

try:
    models = baseline_models.train_all_models(X_train, y_train)
    print("✓ All models trained successfully!")
except Exception as e:
    print(f"Error during training: {e}")
    exit(1)

STEP 4: Training Baseline Models...
This may take several minutes...
=== TRAINING BASELINE MODELS ===
\n1. Training TF-IDF + Logistic Regression...
Fitting 5 folds for each of 18 candidates, totalling 90 fits
Best parameters: {'classifier__C': 1, 'tfidf__max_features': 10000, 'tfidf__ngram_range': (1, 2)}
Best cross-validation score: 0.2383
\n2. Training TF-IDF + Random Forest...
Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [None]:
# Step 5: Evaluate Models
print("STEP 5: Evaluating Models...")

In [None]:
# Test set evaluation
for model_name, model in models.items():
    evaluator.evaluate_model(model, X_test, y_test, model_name)

In [None]:
# Cross-validation evaluation
print("STEP 6: Cross-Validation Analysis...")
for model_name, model in models.items():
    evaluator.cross_validate_model(model, X_train, y_train, model_name, cv=5)

In [None]:
# Step 7: Generate Visualizations
print("STEP 7: Creating Visualizations...")

In [None]:
# Plot confusion matrices
evaluator.plot_confusion_matrices()

# Plot performance comparison
evaluator.plot_performance_comparison()

In [None]:
# Step 8: Create Reports
print("STEP 8: Generating Reports...")

# Model comparison
comparison_df = evaluator.compare_models()

# Detailed report
detailed_report = evaluator.create_detailed_report()


In [None]:
# Step 9: Save Models
print("STEP 9: Saving Models...")
baseline_models.save_models()

In [None]:
# Step 10: Final Performance Summary
print("STEP 10: Final Performance Summary")
print("=" * 60)

if comparison_df is not None:
    best_model = comparison_df.loc[comparison_df['Accuracy'].idxmax()]
    print(f"🥇 Best Model: {best_model['Model']}")
    print(f"📊 Best Accuracy: {best_model['Accuracy']:.4f}")
    print(f"🎯 Best F1-Score: {best_model['F1-Score']:.4f}")

print("BASELINE MODEL TRAINING COMPLETE!")
print("All results saved to 'results/' directory")
print("Models saved to 'models/baseline/' directory")