In [7]:
# Cell 1: Setup and Data Loading
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score, StratifiedKFold
import lightgbm as lgb
import optuna
import joblib
import warnings
warnings.filterwarnings('ignore')

# Paths
TRAIN_PATH = "../data/train_processed_comprehensive.csv"
TEST_PATH = "../data/test_processed_comprehensive.csv"
MODEL_PATH = "../models/academic_risk_model.joblib"
os.makedirs("../models", exist_ok=True)

print("🚀 MULTICLASS CLASSIFICATION SETUP")
print("=" * 50)
print("✅ Libraries imported (including Optuna)")
print("✅ Paths configured")
print("✅ Model directory created")
print("🎯 Target: Multiclass Classification (Grade Categories)")


🚀 MULTICLASS CLASSIFICATION SETUP
✅ Libraries imported (including Optuna)
✅ Paths configured
✅ Model directory created
🎯 Target: Multiclass Classification (Grade Categories)


In [8]:
# Cell 2: Load Data and Convert to Multiclass Classification
print("📊 LOADING DATA AND CONVERTING TO MULTICLASS CLASSIFICATION")
print("=" * 60)

# Load the comprehensive processed datasets
train_df = pd.read_csv(TRAIN_PATH)
test_df = pd.read_csv(TEST_PATH)

print(f"✅ Training data: {train_df.shape[0]:,} samples, {train_df.shape[1]} features")
print(f"✅ Test data: {test_df.shape[0]:,} samples, {test_df.shape[1]} features")

# Convert GPA to Grade Categories for Multiclass Classification
def gpa_to_grade_category(gpa):
    """Convert GPA to grade categories for multiclass classification"""
    if gpa >= 3.7:
        return 'A'  # A, A-
    elif gpa >= 3.0:
        return 'B'  # B+, B, B-
    elif gpa >= 2.0:
        return 'C'  # C+, C, C-
    elif gpa >= 1.0:
        return 'D'  # D+, D
    else:
        return 'F'  # F

# Apply grade category conversion
train_df['grade_category'] = train_df['gpa'].apply(gpa_to_grade_category)
test_df['grade_category'] = test_df['gpa'].apply(gpa_to_grade_category)

# Prepare features and target
id_cols = ["student_id", "course_id"]
target_col = "grade_category"
feature_cols = [c for c in train_df.columns if c not in id_cols + [target_col, 'gpa']]

# Ensure we only have numerical features
print(f"🔍 Checking feature types...")
print(f"   Total columns: {len(train_df.columns)}")
print(f"   Feature columns: {len(feature_cols)}")
print(f"   ID columns: {len(id_cols)}")
print(f"   Target column: {target_col}")

# Check for any non-numerical features
non_numerical = train_df[feature_cols].select_dtypes(exclude=[np.number]).columns.tolist()
if non_numerical:
    print(f"⚠️ Found non-numerical features: {non_numerical}")
    print("   Removing non-numerical features...")
    feature_cols = [c for c in feature_cols if c not in non_numerical]
    print(f"   Updated feature columns: {len(feature_cols)}")

X_train = train_df[feature_cols]
y_train = train_df[target_col]
X_test = test_df[feature_cols]
y_test = test_df[target_col]

print(f"\n✅ Final feature matrix shape: {X_train.shape}")
print(f"✅ Target vector shape: {y_train.shape}")
print(f"✅ Target classes: {sorted(y_train.unique())}")

# Store original target values for display
y_train_original = y_train.copy()
y_test_original = y_test.copy()

# Encode target labels to numerical values for some algorithms
print(f"\n🔧 Encoding target labels...")
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

print(f"   Original classes: {sorted(y_train.unique())}")
print(f"   Encoded classes: {sorted(np.unique(y_train_encoded))}")
print(f"   Class mapping: {dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))}")

# Use encoded targets for training
y_train = y_train_encoded
y_test = y_test_encoded

print(f"\n📊 Dataset Overview:")
print(f"   Features: {len(feature_cols)}")
print(f"   Target: Grade Categories (A, B, C, D, F)")
print(f"   Training samples: {len(X_train):,}")
print(f"   Test samples: {len(X_test):,}")

print(f"\n📊 Target Distribution (Training):")
grade_dist = pd.Series(y_train_original).value_counts()
for grade, count in grade_dist.items():
    print(f"   Grade {grade}: {count:,} samples ({count/len(y_train_original)*100:.1f}%)")

print(f"\n📊 Target Distribution (Test):")
test_grade_dist = pd.Series(y_test_original).value_counts()
for grade, count in test_grade_dist.items():
    print(f"   Grade {grade}: {count:,} samples ({count/len(y_test_original)*100:.1f}%)")

# Check for any missing values
if X_train.isna().sum().sum() > 0 or np.isnan(y_train).sum() > 0:
    print(f"\n⚠️ Missing values detected!")
    print(f"   X_train missing: {X_train.isna().sum().sum()}")
    print(f"   y_train missing: {np.isnan(y_train).sum()}")
else:
    print(f"\n✅ No missing values in training data")

# Check class balance
print(f"\n📊 Class Balance Analysis:")
print(f"   Number of classes: {len(grade_dist)}")
print(f"   Most frequent class: {grade_dist.idxmax()} ({grade_dist.max()} samples)")
print(f"   Least frequent class: {grade_dist.idxmin()} ({grade_dist.min()} samples)")
print(f"   Class imbalance ratio: {grade_dist.max() / grade_dist.min():.2f}:1")


📊 LOADING DATA AND CONVERTING TO MULTICLASS CLASSIFICATION
✅ Training data: 3,281 samples, 143 features
✅ Test data: 821 samples, 143 features
🔍 Checking feature types...
   Total columns: 144
   Feature columns: 140
   ID columns: 2
   Target column: grade_category
⚠️ Found non-numerical features: ['grade']
   Removing non-numerical features...
   Updated feature columns: 139

✅ Final feature matrix shape: (3281, 139)
✅ Target vector shape: (3281,)
✅ Target classes: ['A', 'B', 'C', 'D', 'F']

🔧 Encoding target labels...
   Original classes: ['A', 'B', 'C', 'D', 'F']
   Encoded classes: [np.int64(0), np.int64(1), np.int64(2), np.int64(3), np.int64(4)]
   Class mapping: {'A': np.int64(0), 'B': np.int64(1), 'C': np.int64(2), 'D': np.int64(3), 'F': np.int64(4)}

📊 Dataset Overview:
   Features: 139
   Target: Grade Categories (A, B, C, D, F)
   Training samples: 3,281
   Test samples: 821

📊 Target Distribution (Training):
   Grade A: 1,011 samples (30.8%)
   Grade B: 987 samples (30.1%)


In [9]:
# Cell 3: Baseline Classification Models
print("🎯 TRAINING BASELINE CLASSIFICATION MODELS")
print("=" * 50)

# 1. Logistic Regression (Baseline)
print("📊 Training Logistic Regression...")
lr_model = LogisticRegression(
    multi_class='ovr',  # One-vs-Rest for multiclass
    max_iter=1000,
    random_state=42,
    n_jobs=-1
)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

lr_accuracy = accuracy_score(y_test, y_pred_lr)
lr_f1 = f1_score(y_test, y_pred_lr, average='weighted')
lr_precision = precision_score(y_test, y_pred_lr, average='weighted')
lr_recall = recall_score(y_test, y_pred_lr, average='weighted')

print(f"✅ Logistic Regression Results:")
print(f"   Accuracy: {lr_accuracy:.4f}")
print(f"   F1-Score: {lr_f1:.4f}")
print(f"   Precision: {lr_precision:.4f}")
print(f"   Recall: {lr_recall:.4f}")

# 2. Decision Tree
print("\n📊 Training Decision Tree...")
dt_model = DecisionTreeClassifier(
    max_depth=10,
    min_samples_split=20,
    min_samples_leaf=10,
    random_state=42
)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

dt_accuracy = accuracy_score(y_test, y_pred_dt)
dt_f1 = f1_score(y_test, y_pred_dt, average='weighted')
dt_precision = precision_score(y_test, y_pred_dt, average='weighted')
dt_recall = recall_score(y_test, y_pred_dt, average='weighted')

print(f"✅ Decision Tree Results:")
print(f"   Accuracy: {dt_accuracy:.4f}")
print(f"   F1-Score: {dt_f1:.4f}")
print(f"   Precision: {dt_precision:.4f}")
print(f"   Recall: {dt_recall:.4f}")

# 3. Random Forest
print("\n📊 Training Random Forest...")
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=20,
    min_samples_leaf=10,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

rf_accuracy = accuracy_score(y_test, y_pred_rf)
rf_f1 = f1_score(y_test, y_pred_rf, average='weighted')
rf_precision = precision_score(y_test, y_pred_rf, average='weighted')
rf_recall = recall_score(y_test, y_pred_rf, average='weighted')

print(f"✅ Random Forest Results:")
print(f"   Accuracy: {rf_accuracy:.4f}")
print(f"   F1-Score: {rf_f1:.4f}")
print(f"   Precision: {rf_precision:.4f}")
print(f"   Recall: {rf_recall:.4f}")

# Compare baselines
print(f"\n📊 Baseline Model Comparison:")
print(f"   Logistic Regression F1: {lr_f1:.4f}")
print(f"   Decision Tree F1: {dt_f1:.4f}")
print(f"   Random Forest F1: {rf_f1:.4f}")

best_baseline = max([(lr_f1, "Logistic Regression"), (dt_f1, "Decision Tree"), (rf_f1, "Random Forest")])
print(f"\n🏆 Best Baseline: {best_baseline[1]} (F1 = {best_baseline[0]:.4f})")


🎯 TRAINING BASELINE CLASSIFICATION MODELS
📊 Training Logistic Regression...
✅ Logistic Regression Results:
   Accuracy: 0.3386
   F1-Score: 0.3246
   Precision: 0.3262
   Recall: 0.3386

📊 Training Decision Tree...
✅ Decision Tree Results:
   Accuracy: 0.3045
   F1-Score: 0.2956
   Precision: 0.2924
   Recall: 0.3045

📊 Training Random Forest...
✅ Random Forest Results:
   Accuracy: 0.3520
   F1-Score: 0.3275
   Precision: 0.3183
   Recall: 0.3520

📊 Baseline Model Comparison:
   Logistic Regression F1: 0.3246
   Decision Tree F1: 0.2956
   Random Forest F1: 0.3275

🏆 Best Baseline: Random Forest (F1 = 0.3275)


In [10]:
# Cell 4: LightGBM with Optuna Hyperparameter Tuning
print("🚀 LIGHTGBM WITH OPTUNA HYPERPARAMETER TUNING")
print("=" * 60)

# Define objective function for Optuna
def objective(trial):
    """Objective function for Optuna optimization"""
    params = {
        'objective': 'multiclass',
        'metric': 'multi_logloss',
        'num_class': len(np.unique(y_train)),
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'num_leaves': trial.suggest_int('num_leaves', 10, 100),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'random_state': 42,
        'verbose': -1,
        'n_jobs': -1
    }
    
    # Use cross-validation for robust evaluation
    model = lgb.LGBMClassifier(**params)
    
    # 3-fold stratified CV
    cv_scores = cross_val_score(
        model, X_train, y_train, 
        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
        scoring='f1_weighted',
        n_jobs=1  # Avoid nested parallelism
    )
    
    return cv_scores.mean()

# Create Optuna study
print("🔍 Creating Optuna study...")
study = optuna.create_study(
    direction='maximize',
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
)

# Run optimization
n_trials = 50  # Good balance for this dataset size
print(f"🚀 Running {n_trials} optimization trials...")
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

print(f"\n✅ Optimization Complete!")
print(f"   Best F1 Score: {study.best_value:.4f}")
print(f"   Best Parameters:")
for key, value in study.best_params.items():
    print(f"     {key}: {value}")

# Train final model with best parameters
print(f"\n📊 Training Final LightGBM with Best Parameters...")
best_lgb_model = lgb.LGBMClassifier(**study.best_params, random_state=42, verbose=-1)
best_lgb_model.fit(X_train, y_train)
y_pred_lgb = best_lgb_model.predict(X_test)

# Evaluate final model
lgb_accuracy = accuracy_score(y_test, y_pred_lgb)
lgb_f1 = f1_score(y_test, y_pred_lgb, average='weighted')
lgb_precision = precision_score(y_test, y_pred_lgb, average='weighted')
lgb_recall = recall_score(y_test, y_pred_lgb, average='weighted')

print(f"✅ Optimized LightGBM Results:")
print(f"   Accuracy: {lgb_accuracy:.4f}")
print(f"   F1-Score: {lgb_f1:.4f}")
print(f"   Precision: {lgb_precision:.4f}")
print(f"   Recall: {lgb_recall:.4f}")

# Feature importance
print(f"\n📊 Top 15 Most Important Features:")
feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': best_lgb_model.feature_importances_
}).sort_values('importance', ascending=False)

print(feature_importance.head(15))

# Show embedding features importance
embedding_features = [col for col in feature_cols if 'emb_' in col]
embedding_importance = feature_importance[feature_importance['feature'].isin(embedding_features)]
print(f"\n📊 Embedding Features Importance (Top 10):")
print(embedding_importance.head(10))

# Show academic features importance
academic_features = [col for col in feature_cols if 'emb_' not in col and 'community' not in col]
academic_importance = feature_importance[feature_importance['feature'].isin(academic_features)]
print(f"\n📊 Academic Features Importance:")
print(academic_importance)


[I 2025-09-28 02:58:48,453] A new study created in memory with name: no-name-db2d43d7-51e6-4386-a490-77ebdba39df7


🚀 LIGHTGBM WITH OPTUNA HYPERPARAMETER TUNING
🔍 Creating Optuna study...
🚀 Running 50 optimization trials...


Best trial: 0. Best value: 0.309626:   2%|▏         | 1/50 [00:06<05:15,  6.44s/it]

[I 2025-09-28 02:58:54,890] Trial 0 finished with value: 0.3096259965694667 and parameters: {'n_estimators': 250, 'learning_rate': 0.2536999076681771, 'max_depth': 10, 'num_leaves': 64, 'subsample': 0.6624074561769746, 'colsample_bytree': 0.662397808134481, 'reg_alpha': 0.05808361216819946, 'reg_lambda': 0.8661761457749352, 'min_child_samples': 64}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:   4%|▍         | 2/50 [00:18<07:51,  9.83s/it]

[I 2025-09-28 02:59:07,088] Trial 1 finished with value: 0.3073591516661131 and parameters: {'n_estimators': 383, 'learning_rate': 0.010725209743171997, 'max_depth': 12, 'num_leaves': 85, 'subsample': 0.6849356442713105, 'colsample_bytree': 0.6727299868828402, 'reg_alpha': 0.18340450985343382, 'reg_lambda': 0.3042422429595377, 'min_child_samples': 57}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:   6%|▌         | 3/50 [00:26<07:00,  8.95s/it]

[I 2025-09-28 02:59:14,992] Trial 2 finished with value: 0.29739861801828615 and parameters: {'n_estimators': 273, 'learning_rate': 0.02692655251486473, 'max_depth': 9, 'num_leaves': 22, 'subsample': 0.7168578594140872, 'colsample_bytree': 0.7465447373174766, 'reg_alpha': 0.45606998421703593, 'reg_lambda': 0.7851759613930136, 'min_child_samples': 28}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:   8%|▊         | 4/50 [00:29<04:55,  6.41s/it]

[I 2025-09-28 02:59:17,522] Trial 3 finished with value: 0.3006822282458555 and parameters: {'n_estimators': 306, 'learning_rate': 0.07500118950416987, 'max_depth': 3, 'num_leaves': 65, 'subsample': 0.6682096494749166, 'colsample_bytree': 0.6260206371941118, 'reg_alpha': 0.9488855372533332, 'reg_lambda': 0.9656320330745594, 'min_child_samples': 83}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  10%|█         | 5/50 [00:38<05:40,  7.56s/it]

[I 2025-09-28 02:59:27,108] Trial 4 finished with value: 0.3090865242100041 and parameters: {'n_estimators': 222, 'learning_rate': 0.013940346079873234, 'max_depth': 9, 'num_leaves': 50, 'subsample': 0.6488152939379115, 'colsample_bytree': 0.798070764044508, 'reg_alpha': 0.034388521115218396, 'reg_lambda': 0.9093204020787821, 'min_child_samples': 33}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  12%|█▏        | 6/50 [00:46<05:44,  7.82s/it]

[I 2025-09-28 02:59:35,439] Trial 5 finished with value: 0.2993314587790193 and parameters: {'n_estimators': 365, 'learning_rate': 0.028869220380495747, 'max_depth': 8, 'num_leaves': 59, 'subsample': 0.6739417822102108, 'colsample_bytree': 0.9878338511058234, 'reg_alpha': 0.7751328233611146, 'reg_lambda': 0.9394989415641891, 'min_child_samples': 91}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  14%|█▍        | 7/50 [00:49<04:25,  6.18s/it]

[I 2025-09-28 02:59:38,245] Trial 6 finished with value: 0.30657240848590894 and parameters: {'n_estimators': 339, 'learning_rate': 0.22999586428143734, 'max_depth': 3, 'num_leaves': 27, 'subsample': 0.6180909155642152, 'colsample_bytree': 0.7301321323053057, 'reg_alpha': 0.388677289689482, 'reg_lambda': 0.2713490317738959, 'min_child_samples': 85}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  16%|█▌        | 8/50 [00:56<04:25,  6.32s/it]

[I 2025-09-28 02:59:44,873] Trial 7 finished with value: 0.3062309886504992 and parameters: {'n_estimators': 243, 'learning_rate': 0.026000059117302653, 'max_depth': 8, 'num_leaves': 22, 'subsample': 0.9208787923016158, 'colsample_bytree': 0.6298202574719083, 'reg_alpha': 0.9868869366005173, 'reg_lambda': 0.7722447692966574, 'min_child_samples': 28}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  18%|█▊        | 9/50 [00:58<03:25,  5.00s/it]

[I 2025-09-28 02:59:46,979] Trial 8 finished with value: 0.30480672522546626 and parameters: {'n_estimators': 102, 'learning_rate': 0.16015312171361207, 'max_depth': 10, 'num_leaves': 76, 'subsample': 0.9085081386743783, 'colsample_bytree': 0.6296178606936361, 'reg_alpha': 0.3584657285442726, 'reg_lambda': 0.11586905952512971, 'min_child_samples': 88}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  20%|██        | 10/50 [01:01<02:58,  4.47s/it]

[I 2025-09-28 02:59:50,240] Trial 9 finished with value: 0.30906886792406896 and parameters: {'n_estimators': 349, 'learning_rate': 0.030816017044468066, 'max_depth': 3, 'num_leaves': 38, 'subsample': 0.7300733288106988, 'colsample_bytree': 0.8918424713352257, 'reg_alpha': 0.6375574713552131, 'reg_lambda': 0.8872127425763265, 'min_child_samples': 52}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  22%|██▏       | 11/50 [01:12<04:03,  6.23s/it]

[I 2025-09-28 03:00:00,477] Trial 10 finished with value: 0.30035751458975174 and parameters: {'n_estimators': 478, 'learning_rate': 0.10251868762308501, 'max_depth': 6, 'num_leaves': 94, 'subsample': 0.8085360047450806, 'colsample_bytree': 0.8760988294276582, 'reg_alpha': 0.015144237102756877, 'reg_lambda': 0.6233765186294654, 'min_child_samples': 61}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 0. Best value: 0.309626:  24%|██▍       | 12/50 [01:22<04:49,  7.62s/it]

[I 2025-09-28 03:00:11,266] Trial 11 finished with value: 0.30647569989204365 and parameters: {'n_estimators': 187, 'learning_rate': 0.010555894926322258, 'max_depth': 11, 'num_leaves': 45, 'subsample': 0.7898925643930463, 'colsample_bytree': 0.8226010179005339, 'reg_alpha': 0.011732157206912234, 'reg_lambda': 0.5732909776512292, 'min_child_samples': 11}. Best is trial 0 with value: 0.3096259965694667.


Best trial: 12. Best value: 0.312247:  26%|██▌       | 13/50 [01:28<04:21,  7.08s/it]

[I 2025-09-28 03:00:17,101] Trial 12 finished with value: 0.3122474266264243 and parameters: {'n_estimators': 189, 'learning_rate': 0.055808692324215654, 'max_depth': 7, 'num_leaves': 47, 'subsample': 0.6109470798815742, 'colsample_bytree': 0.7413584553307843, 'reg_alpha': 0.2082802661088238, 'reg_lambda': 0.7299436732862953, 'min_child_samples': 41}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  28%|██▊       | 14/50 [01:31<03:33,  5.92s/it]

[I 2025-09-28 03:00:20,333] Trial 13 finished with value: 0.3068131914788604 and parameters: {'n_estimators': 150, 'learning_rate': 0.2820997924290723, 'max_depth': 6, 'num_leaves': 65, 'subsample': 0.6010360207662125, 'colsample_bytree': 0.7056228746417736, 'reg_alpha': 0.2196098336030803, 'reg_lambda': 0.7114870148644771, 'min_child_samples': 69}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  30%|███       | 15/50 [01:34<02:53,  4.96s/it]

[I 2025-09-28 03:00:23,081] Trial 14 finished with value: 0.3030826461053277 and parameters: {'n_estimators': 163, 'learning_rate': 0.05496385473733053, 'max_depth': 6, 'num_leaves': 11, 'subsample': 0.7864727961201929, 'colsample_bytree': 0.7766589954827275, 'reg_alpha': 0.20221545090113216, 'reg_lambda': 0.39998069938371494, 'min_child_samples': 42}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  32%|███▏      | 16/50 [01:38<02:33,  4.51s/it]

[I 2025-09-28 03:00:26,534] Trial 15 finished with value: 0.2966118531137734 and parameters: {'n_estimators': 205, 'learning_rate': 0.13166430498914292, 'max_depth': 5, 'num_leaves': 77, 'subsample': 0.8561711433906498, 'colsample_bytree': 0.684972690542045, 'reg_alpha': 0.28563058573467925, 'reg_lambda': 0.503553535027425, 'min_child_samples': 71}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  34%|███▍      | 17/50 [01:42<02:23,  4.35s/it]

[I 2025-09-28 03:00:30,514] Trial 16 finished with value: 0.30794286751976485 and parameters: {'n_estimators': 101, 'learning_rate': 0.05776716725148537, 'max_depth': 10, 'num_leaves': 45, 'subsample': 0.731194128587385, 'colsample_bytree': 0.8378953523354642, 'reg_alpha': 0.5940978966045696, 'reg_lambda': 0.665317673929278, 'min_child_samples': 47}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  36%|███▌      | 18/50 [01:46<02:23,  4.48s/it]

[I 2025-09-28 03:00:35,301] Trial 17 finished with value: 0.2979841675809748 and parameters: {'n_estimators': 260, 'learning_rate': 0.18066334351041807, 'max_depth': 12, 'num_leaves': 35, 'subsample': 0.602481017250404, 'colsample_bytree': 0.6682570583344382, 'reg_alpha': 0.1276665102082612, 'reg_lambda': 0.8221449765610106, 'min_child_samples': 100}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  38%|███▊      | 19/50 [01:59<03:38,  7.06s/it]

[I 2025-09-28 03:00:48,377] Trial 18 finished with value: 0.30390512136445963 and parameters: {'n_estimators': 450, 'learning_rate': 0.09691933397433788, 'max_depth': 7, 'num_leaves': 61, 'subsample': 0.6386640354069466, 'colsample_bytree': 0.73964158693172, 'reg_alpha': 0.11072341297753602, 'reg_lambda': 0.48284864646139225, 'min_child_samples': 14}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  40%|████      | 20/50 [02:11<04:10,  8.35s/it]

[I 2025-09-28 03:00:59,742] Trial 19 finished with value: 0.31179968707618183 and parameters: {'n_estimators': 296, 'learning_rate': 0.04302553031342478, 'max_depth': 10, 'num_leaves': 100, 'subsample': 0.704724304243577, 'colsample_bytree': 0.6028550510494292, 'reg_alpha': 0.3464445264801279, 'reg_lambda': 0.9978407794240471, 'min_child_samples': 38}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  42%|████▏     | 21/50 [02:19<04:02,  8.37s/it]

[I 2025-09-28 03:01:08,137] Trial 20 finished with value: 0.2954123198977115 and parameters: {'n_estimators': 417, 'learning_rate': 0.03724758407777502, 'max_depth': 5, 'num_leaves': 95, 'subsample': 0.971241147293149, 'colsample_bytree': 0.9579418118176919, 'reg_alpha': 0.5102871570417014, 'reg_lambda': 0.011620961627553272, 'min_child_samples': 38}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  44%|████▍     | 22/50 [02:35<05:00, 10.74s/it]

[I 2025-09-28 03:01:24,409] Trial 21 finished with value: 0.3035287481392433 and parameters: {'n_estimators': 295, 'learning_rate': 0.01844806551932516, 'max_depth': 10, 'num_leaves': 73, 'subsample': 0.6942053969211587, 'colsample_bytree': 0.6233175974124288, 'reg_alpha': 0.32266284894222735, 'reg_lambda': 0.8521285531827102, 'min_child_samples': 22}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  46%|████▌     | 23/50 [02:44<04:32, 10.11s/it]

[I 2025-09-28 03:01:33,036] Trial 22 finished with value: 0.29845806115652734 and parameters: {'n_estimators': 313, 'learning_rate': 0.03786946567907278, 'max_depth': 9, 'num_leaves': 100, 'subsample': 0.7658313612926888, 'colsample_bytree': 0.6622117239974942, 'reg_alpha': 0.2698216289672999, 'reg_lambda': 0.9974698284809731, 'min_child_samples': 63}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  48%|████▊     | 24/50 [02:52<04:09,  9.60s/it]

[I 2025-09-28 03:01:41,446] Trial 23 finished with value: 0.30218370303969694 and parameters: {'n_estimators': 235, 'learning_rate': 0.0722675139571516, 'max_depth': 11, 'num_leaves': 83, 'subsample': 0.6435507928070006, 'colsample_bytree': 0.7062115493275856, 'reg_alpha': 0.1199389344130884, 'reg_lambda': 0.729424439040219, 'min_child_samples': 46}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  50%|█████     | 25/50 [02:56<03:12,  7.70s/it]

[I 2025-09-28 03:01:44,706] Trial 24 finished with value: 0.3050597686550402 and parameters: {'n_estimators': 157, 'learning_rate': 0.041150936002421076, 'max_depth': 7, 'num_leaves': 53, 'subsample': 0.7510913897332896, 'colsample_bytree': 0.6126486621390768, 'reg_alpha': 0.4058796710792996, 'reg_lambda': 0.8433437293330825, 'min_child_samples': 74}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  52%|█████▏    | 26/50 [03:05<03:16,  8.21s/it]

[I 2025-09-28 03:01:54,105] Trial 25 finished with value: 0.3091180986262552 and parameters: {'n_estimators': 278, 'learning_rate': 0.019252362836923026, 'max_depth': 11, 'num_leaves': 35, 'subsample': 0.6969578558361657, 'colsample_bytree': 0.6012924677097552, 'reg_alpha': 0.09739517002761997, 'reg_lambda': 0.9807821489745049, 'min_child_samples': 50}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  54%|█████▍    | 27/50 [03:12<03:01,  7.88s/it]

[I 2025-09-28 03:02:01,211] Trial 26 finished with value: 0.30932895560662577 and parameters: {'n_estimators': 202, 'learning_rate': 0.07788578037838091, 'max_depth': 8, 'num_leaves': 68, 'subsample': 0.6365421054673541, 'colsample_bytree': 0.6552828383780902, 'reg_alpha': 0.22385222076348227, 'reg_lambda': 0.6917125190300659, 'min_child_samples': 37}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  56%|█████▌    | 28/50 [03:28<03:46, 10.32s/it]

[I 2025-09-28 03:02:17,226] Trial 27 finished with value: 0.3097761942056945 and parameters: {'n_estimators': 324, 'learning_rate': 0.04808168512897296, 'max_depth': 9, 'num_leaves': 85, 'subsample': 0.8432899406365262, 'colsample_bytree': 0.7610653810021236, 'reg_alpha': 0.5001657985189769, 'reg_lambda': 0.8970593166535472, 'min_child_samples': 21}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  58%|█████▊    | 29/50 [03:46<04:20, 12.43s/it]

[I 2025-09-28 03:02:34,569] Trial 28 finished with value: 0.30354196154478225 and parameters: {'n_estimators': 385, 'learning_rate': 0.04790225423379599, 'max_depth': 9, 'num_leaves': 87, 'subsample': 0.8526333921184073, 'colsample_bytree': 0.7777605241655242, 'reg_alpha': 0.5212967199807143, 'reg_lambda': 0.7691365091205239, 'min_child_samples': 17}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  60%|██████    | 30/50 [03:57<04:02, 12.10s/it]

[I 2025-09-28 03:02:45,911] Trial 29 finished with value: 0.3074841691173226 and parameters: {'n_estimators': 330, 'learning_rate': 0.06106320771972141, 'max_depth': 7, 'num_leaves': 91, 'subsample': 0.8168079219235658, 'colsample_bytree': 0.859210419207521, 'reg_alpha': 0.667985108272492, 'reg_lambda': 0.9147783724727612, 'min_child_samples': 24}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  62%|██████▏   | 31/50 [04:03<03:12, 10.16s/it]

[I 2025-09-28 03:02:51,528] Trial 30 finished with value: 0.29896785985479224 and parameters: {'n_estimators': 136, 'learning_rate': 0.045431725662237375, 'max_depth': 8, 'num_leaves': 100, 'subsample': 0.8420163220109168, 'colsample_bytree': 0.928105658304223, 'reg_alpha': 0.7287733564458421, 'reg_lambda': 0.5854437140381376, 'min_child_samples': 33}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  64%|██████▍   | 32/50 [04:10<02:48,  9.33s/it]

[I 2025-09-28 03:02:58,947] Trial 31 finished with value: 0.3063414817336862 and parameters: {'n_estimators': 259, 'learning_rate': 0.10234226244430565, 'max_depth': 12, 'num_leaves': 84, 'subsample': 0.6689718860925683, 'colsample_bytree': 0.7010563725549498, 'reg_alpha': 0.44853221119513076, 'reg_lambda': 0.852696466338004, 'min_child_samples': 56}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  66%|██████▌   | 33/50 [04:21<02:45,  9.73s/it]

[I 2025-09-28 03:03:09,609] Trial 32 finished with value: 0.30801098917843367 and parameters: {'n_estimators': 286, 'learning_rate': 0.021793190071613823, 'max_depth': 10, 'num_leaves': 81, 'subsample': 0.8913735708174502, 'colsample_bytree': 0.7645930912587906, 'reg_alpha': 0.1668367861364233, 'reg_lambda': 0.7959478041823813, 'min_child_samples': 43}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 12. Best value: 0.312247:  68%|██████▊   | 34/50 [04:29<02:27,  9.22s/it]

[I 2025-09-28 03:03:17,634] Trial 33 finished with value: 0.30977059551060515 and parameters: {'n_estimators': 311, 'learning_rate': 0.13044351632394663, 'max_depth': 9, 'num_leaves': 90, 'subsample': 0.7155605794824682, 'colsample_bytree': 0.7281654271859975, 'reg_alpha': 0.302427739889209, 'reg_lambda': 0.9995599699441596, 'min_child_samples': 20}. Best is trial 12 with value: 0.3122474266264243.


Best trial: 34. Best value: 0.313802:  70%|███████   | 35/50 [04:37<02:15,  9.05s/it]

[I 2025-09-28 03:03:26,285] Trial 34 finished with value: 0.31380206837823194 and parameters: {'n_estimators': 395, 'learning_rate': 0.12086552797249225, 'max_depth': 9, 'num_leaves': 92, 'subsample': 0.7650723155381586, 'colsample_bytree': 0.800569795502006, 'reg_alpha': 0.3230337031045488, 'reg_lambda': 0.948240665286624, 'min_child_samples': 20}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  72%|███████▏  | 36/50 [04:49<02:19,  9.98s/it]

[I 2025-09-28 03:03:38,443] Trial 35 finished with value: 0.3090347774838455 and parameters: {'n_estimators': 406, 'learning_rate': 0.0690071596799701, 'max_depth': 9, 'num_leaves': 95, 'subsample': 0.7607284879416079, 'colsample_bytree': 0.8059603158780498, 'reg_alpha': 0.4525988308191177, 'reg_lambda': 0.920841445712783, 'min_child_samples': 30}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  74%|███████▍  | 37/50 [05:05<02:31, 11.64s/it]

[I 2025-09-28 03:03:53,950] Trial 36 finished with value: 0.30521384027262094 and parameters: {'n_estimators': 368, 'learning_rate': 0.014564114265794946, 'max_depth': 10, 'num_leaves': 71, 'subsample': 0.8256816271223836, 'colsample_bytree': 0.759980421172767, 'reg_alpha': 0.35414585451901837, 'reg_lambda': 0.9319265985536321, 'min_child_samples': 36}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  76%|███████▌  | 38/50 [05:24<02:47, 13.94s/it]

[I 2025-09-28 03:04:13,246] Trial 37 finished with value: 0.2976327204512904 and parameters: {'n_estimators': 436, 'learning_rate': 0.032571695627358045, 'max_depth': 8, 'num_leaves': 100, 'subsample': 0.8740679659140748, 'colsample_bytree': 0.7948687308479514, 'reg_alpha': 0.5562983897269425, 'reg_lambda': 0.8800829237280188, 'min_child_samples': 26}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  78%|███████▊  | 39/50 [05:37<02:29, 13.59s/it]

[I 2025-09-28 03:04:26,019] Trial 38 finished with value: 0.3091243379088866 and parameters: {'n_estimators': 495, 'learning_rate': 0.084083248728327, 'max_depth': 9, 'num_leaves': 79, 'subsample': 0.7839032420618607, 'colsample_bytree': 0.8277100228352468, 'reg_alpha': 0.40447841880978913, 'reg_lambda': 0.7506952025471694, 'min_child_samples': 10}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  80%|████████  | 40/50 [05:43<01:53, 11.38s/it]

[I 2025-09-28 03:04:32,242] Trial 39 finished with value: 0.3026963890168756 and parameters: {'n_estimators': 394, 'learning_rate': 0.2161512504601446, 'max_depth': 11, 'num_leaves': 90, 'subsample': 0.7442509040750934, 'colsample_bytree': 0.9109841766749615, 'reg_alpha': 0.24084666021367784, 'reg_lambda': 0.9429588204984916, 'min_child_samples': 19}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  82%|████████▏ | 41/50 [05:57<01:48, 12.00s/it]

[I 2025-09-28 03:04:45,707] Trial 40 finished with value: 0.30460381676240056 and parameters: {'n_estimators': 358, 'learning_rate': 0.048852968156234695, 'max_depth': 7, 'num_leaves': 58, 'subsample': 0.9511571060687568, 'colsample_bytree': 0.8014744753964063, 'reg_alpha': 0.34331901261693754, 'reg_lambda': 0.8096685255002536, 'min_child_samples': 31}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  84%|████████▍ | 42/50 [06:05<01:27, 10.97s/it]

[I 2025-09-28 03:04:54,269] Trial 41 finished with value: 0.313128190372579 and parameters: {'n_estimators': 327, 'learning_rate': 0.13716590720094468, 'max_depth': 9, 'num_leaves': 91, 'subsample': 0.7107628137312193, 'colsample_bytree': 0.7286866997646168, 'reg_alpha': 0.2746986575873201, 'reg_lambda': 0.9887014339787646, 'min_child_samples': 19}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  86%|████████▌ | 43/50 [06:13<01:09,  9.95s/it]

[I 2025-09-28 03:05:01,849] Trial 42 finished with value: 0.3055023818691802 and parameters: {'n_estimators': 327, 'learning_rate': 0.1287478629300098, 'max_depth': 9, 'num_leaves': 86, 'subsample': 0.69718234643494, 'colsample_bytree': 0.7294622363849571, 'reg_alpha': 0.44492487028106875, 'reg_lambda': 0.9548314539231105, 'min_child_samples': 24}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  88%|████████▊ | 44/50 [06:21<00:57,  9.54s/it]

[I 2025-09-28 03:05:10,412] Trial 43 finished with value: 0.3083067793346112 and parameters: {'n_estimators': 346, 'learning_rate': 0.1764568425750286, 'max_depth': 8, 'num_leaves': 93, 'subsample': 0.7210519518117873, 'colsample_bytree': 0.7543830645917659, 'reg_alpha': 0.16592547046835798, 'reg_lambda': 0.882729271994338, 'min_child_samples': 16}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  90%|█████████ | 45/50 [06:34<00:51, 10.37s/it]

[I 2025-09-28 03:05:22,736] Trial 44 finished with value: 0.3058542313814711 and parameters: {'n_estimators': 426, 'learning_rate': 0.06368422932521703, 'max_depth': 10, 'num_leaves': 48, 'subsample': 0.7717776926825116, 'colsample_bytree': 0.7865244859229475, 'reg_alpha': 0.8602388794499966, 'reg_lambda': 0.9618399010447776, 'min_child_samples': 28}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  92%|█████████▏| 46/50 [06:45<00:42, 10.59s/it]

[I 2025-09-28 03:05:33,824] Trial 45 finished with value: 0.304518375244512 and parameters: {'n_estimators': 370, 'learning_rate': 0.09172719612762996, 'max_depth': 8, 'num_leaves': 96, 'subsample': 0.6794391684778894, 'colsample_bytree': 0.8472688642873126, 'reg_alpha': 0.26050675796654793, 'reg_lambda': 0.27529230217095163, 'min_child_samples': 12}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  94%|█████████▍| 47/50 [06:58<00:33, 11.29s/it]

[I 2025-09-28 03:05:46,755] Trial 46 finished with value: 0.3072811578741547 and parameters: {'n_estimators': 320, 'learning_rate': 0.025552864913986822, 'max_depth': 11, 'num_leaves': 87, 'subsample': 0.7072029805772403, 'colsample_bytree': 0.6445662056275994, 'reg_alpha': 0.3786887719510646, 'reg_lambda': 0.9005719613984264, 'min_child_samples': 41}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  96%|█████████▌| 48/50 [07:05<00:20, 10.17s/it]

[I 2025-09-28 03:05:54,302] Trial 47 finished with value: 0.29734717309398534 and parameters: {'n_estimators': 298, 'learning_rate': 0.1229934688326258, 'max_depth': 9, 'num_leaves': 28, 'subsample': 0.6552492803630611, 'colsample_bytree': 0.7182816667372344, 'reg_alpha': 0.3117067575611571, 'reg_lambda': 0.6478892042133175, 'min_child_samples': 34}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802:  98%|█████████▊| 49/50 [07:19<00:11, 11.09s/it]

[I 2025-09-28 03:06:07,559] Trial 48 finished with value: 0.30615546221142514 and parameters: {'n_estimators': 228, 'learning_rate': 0.1136306009836444, 'max_depth': 10, 'num_leaves': 75, 'subsample': 0.8015467078259829, 'colsample_bytree': 0.6832092718580957, 'reg_alpha': 0.056423385332759535, 'reg_lambda': 0.9941341769178521, 'min_child_samples': 20}. Best is trial 34 with value: 0.31380206837823194.


Best trial: 34. Best value: 0.313802: 100%|██████████| 50/50 [07:24<00:00,  8.89s/it]


[I 2025-09-28 03:06:12,957] Trial 49 finished with value: 0.30481197974175267 and parameters: {'n_estimators': 468, 'learning_rate': 0.15888123264714096, 'max_depth': 6, 'num_leaves': 41, 'subsample': 0.7346314912783337, 'colsample_bytree': 0.8167933822916276, 'reg_alpha': 0.55710998676073, 'reg_lambda': 0.78919611985621, 'min_child_samples': 51}. Best is trial 34 with value: 0.31380206837823194.

✅ Optimization Complete!
   Best F1 Score: 0.3138
   Best Parameters:
     n_estimators: 395
     learning_rate: 0.12086552797249225
     max_depth: 9
     num_leaves: 92
     subsample: 0.7650723155381586
     colsample_bytree: 0.800569795502006
     reg_alpha: 0.3230337031045488
     reg_lambda: 0.948240665286624
     min_child_samples: 20

📊 Training Final LightGBM with Best Parameters...
✅ Optimized LightGBM Results:
   Accuracy: 0.3313
   F1-Score: 0.3207
   Precision: 0.3159
   Recall: 0.3313

📊 Top 15 Most Important Features:
                         feature  importance
2   student_ove

In [11]:
# Cell 5: Model Comparison and Selection
print("🏆 MODEL COMPARISON AND SELECTION")
print("=" * 50)

# Compare all models
models_comparison = pd.DataFrame({
    'Model': ['Logistic Regression', 'Decision Tree', 'Random Forest', 'LightGBM (Optuna)'],
    'Accuracy': [lr_accuracy, dt_accuracy, rf_accuracy, lgb_accuracy],
    'F1-Score': [lr_f1, dt_f1, rf_f1, lgb_f1],
    'Precision': [lr_precision, dt_precision, rf_precision, lgb_precision],
    'Recall': [lr_recall, dt_recall, rf_recall, lgb_recall]
})

print("📊 Model Performance Comparison:")
print(models_comparison.round(4))

# Find best model based on F1-Score
best_model_idx = models_comparison['F1-Score'].idxmax()
best_model_name = models_comparison.loc[best_model_idx, 'Model']
best_f1 = models_comparison.loc[best_model_idx, 'F1-Score']

print(f"\n🏆 Best Model: {best_model_name}")
print(f"   F1-Score: {best_f1:.4f}")
print(f"   Accuracy: {models_comparison.loc[best_model_idx, 'Accuracy']:.4f}")

# Select the best model for saving
if best_model_name == 'Logistic Regression':
    final_model = lr_model
    model_type = 'logistic_regression'
elif best_model_name == 'Decision Tree':
    final_model = dt_model
    model_type = 'decision_tree'
elif best_model_name == 'Random Forest':
    final_model = rf_model
    model_type = 'random_forest'
else:  # LightGBM
    final_model = best_lgb_model
    model_type = 'lightgbm_optimized'

print(f"\n💾 Saving {best_model_name} as the final model...")

# Save the best model
model_package = {
    'model': final_model,
    'model_type': model_type,
    'feature_names': feature_cols,
    'target_name': 'grade_category',
    'class_names': sorted(y_train.unique().tolist()),
    'performance': {
        'accuracy': models_comparison.loc[best_model_idx, 'Accuracy'],
        'f1_score': models_comparison.loc[best_model_idx, 'F1-Score'],
        'precision': models_comparison.loc[best_model_idx, 'Precision'],
        'recall': models_comparison.loc[best_model_idx, 'Recall']
    },
    'training_samples': len(X_train),
    'test_samples': len(X_test),
    'total_features': len(feature_cols),
    'optuna_params': study.best_params if model_type == 'lightgbm_optimized' else None
}

joblib.dump(model_package, MODEL_PATH)
print(f"✅ Model saved to: {MODEL_PATH}")

# Detailed classification report for best model
print(f"\n📊 Detailed Classification Report for {best_model_name}:")
print(classification_report(y_test, y_pred_lgb, target_names=label_encoder.classes_))

# Confusion matrix
print(f"\n📊 Confusion Matrix:")
cm = confusion_matrix(y_test, y_pred_lgb)
print(cm)

# Final summary
print(f"\n🎯 TRAINING COMPLETE!")
print(f"📊 Final Summary:")
print(f"   Best Model: {best_model_name}")
print(f"   F1-Score: {best_f1:.4f}")
print(f"   Training Samples: {len(X_train):,}")
print(f"   Test Samples: {len(X_test):,}")
print(f"   Features: {len(feature_cols)}")
print(f"   Target: Grade Categories (A, B, C, D, F)")
print(f"   Datasets Used: train_processed_comprehensive.csv, test_processed_comprehensive.csv")
print(f"\n🚀 Model ready for deployment!")


🏆 MODEL COMPARISON AND SELECTION
📊 Model Performance Comparison:
                 Model  Accuracy  F1-Score  Precision  Recall
0  Logistic Regression    0.3386    0.3246     0.3262  0.3386
1        Decision Tree    0.3045    0.2956     0.2924  0.3045
2        Random Forest    0.3520    0.3275     0.3183  0.3520
3    LightGBM (Optuna)    0.3313    0.3207     0.3159  0.3313

🏆 Best Model: Random Forest
   F1-Score: 0.3275
   Accuracy: 0.3520

💾 Saving Random Forest as the final model...


AttributeError: 'numpy.ndarray' object has no attribute 'unique'