In [None]:
import pandas as pd
import lightgbm as lgb
import json
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

# Load data
train = pd.read_csv('data/processed/train_features.csv')
val = pd.read_csv('data/processed/val_features.csv')
with open('data/processed/feature_columns.json', 'r') as f:
    features = json.load(f)

X_train, y_train = train[features], train['Class']
X_val, y_val = val[features], val['Class']

# LightGBM with class weights
scale_pos_weight = (y_train == 0).sum() / (y_train == 1).sum()
params = {
    'objective': 'binary',
    'metric': 'auc',
    'scale_pos_weight': scale_pos_weight,
    'verbosity': -1,
    'random_state': 42
}
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

model = lgb.train(params, train_data, valid_sets=[val_data], num_boost_round=100)

# Predict
y_pred = model.predict(X_val)
y_pred_bin = (y_pred > 0.5).astype(int)

# Metrics
metrics = {
    'precision': precision_score(y_val, y_pred_bin),
    'recall': recall_score(y_val, y_pred_bin),
    'f1': f1_score(y_val, y_pred_bin),
    'roc_auc': roc_auc_score(y_val, y_pred),
    'pr_auc': average_precision_score(y_val, y_pred)
}

with open('data/lgbm_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)

# Feature importance
lgb.plot_importance(model, max_num_features=20)
plt.savefig('data/feature_importance.png')
plt.close()

# Save model
model.save_model('data/models/lgbm_model.pkl')