In [None]:
# model_development_log.ipynb

# -------------------------------
# 1. Setup & Install Libraries
# -------------------------------
!pip install scikit-learn xgboost lightgbm catboost -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# -------------------------------
# 2. Load Processed Data
# -------------------------------
train_df = pd.read_csv('data/processed_train.csv')

# Splitting features and target
X = train_df.drop(columns=['target'])
y = train_df['target']

# Train-Test Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# -------------------------------
# 3. Model Development Log - Logistic Regression
# -------------------------------
print("Training Logistic Regression...")
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_val)

# Evaluate Logistic Regression
lr_accuracy = accuracy_score(y_val, y_pred_lr)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")
print(confusion_matrix(y_val, y_pred_lr))
print(classification_report(y_val, y_pred_lr))

# -------------------------------
# 4. Model Development Log - Random Forest
# -------------------------------
print("Training Random Forest Classifier...")
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_val)

# Evaluate Random Forest
rf_accuracy = accuracy_score(y_val, y_pred_rf)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")
print(confusion_matrix(y_val, y_pred_rf))
print(classification_report(y_val, y_pred_rf))

# -------------------------------
# 5. Model Development Log - XGBoost
# -------------------------------
print("Training XGBoost Classifier...")
xgb_model = XGBClassifier(n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_val)

# Evaluate XGBoost
xgb_accuracy = accuracy_score(y_val, y_pred_xgb)
print(f"XGBoost Accuracy: {xgb_accuracy:.4f}")
print(confusion_matrix(y_val, y_pred_xgb))
print(classification_report(y_val, y_pred_xgb))

# -------------------------------
# 6. Model Development Log - LightGBM
# -------------------------------
print("Training LightGBM Classifier...")
lgbm_model = LGBMClassifier(n_estimators=100, random_state=42)
lgbm_model.fit(X_train, y_train)
y_pred_lgbm = lgbm_model.predict(X_val)

# Evaluate LightGBM
lgbm_accuracy = accuracy_score(y_val, y_pred_lgbm)
print(f"LightGBM Accuracy: {lgbm_accuracy:.4f}")
print(confusion_matrix(y_val, y_pred_lgbm))
print(classification_report(y_val, y_pred_lgbm))

# -------------------------------
# 7. Model Development Log - CatBoost
# -------------------------------
print("Training CatBoost Classifier...")
catboost_model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, random_state=42, verbose=0)
catboost_model.fit(X_train, y_train)
y_pred_catboost = catboost_model.predict(X_val)

# Evaluate CatBoost
catboost_accuracy = accuracy_score(y_val, y_pred_catboost)
print(f"CatBoost Accuracy: {catboost_accuracy:.4f}")
print(confusion_matrix(y_val, y_pred_catboost))
print(classification_report(y_val, y_pred_catboost))

# -------------------------------
# 8. Summary of Results
# -------------------------------
results = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest', 'XGBoost', 'LightGBM', 'CatBoost'],
    'Accuracy': [lr_accuracy, rf_accuracy, xgb_accuracy, lgbm_accuracy, catboost_accuracy]
})

print("\nModel Comparison Results:")
print(results)

# Visualize Model Comparison
plt.figure(figsize=(10, 5))
sns.barplot(x='Model', y='Accuracy', data=results, palette='viridis')
plt.title('Model Comparison: Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.xticks(rotation=45)
plt.show()

