# Gradient Boosting Model Training
Training XGBoost and LightGBM models for energy prediction

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import xgboost as xgb
import lightgbm as lgb
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, '../src')

## 1. Load and Prepare Data

In [None]:
from data_preprocessing import EnergyDataPreprocessor

# Load data
df = pd.read_csv('../data/building_energy_data.csv')

# Preprocess
preprocessor = EnergyDataPreprocessor()
df_processed = preprocessor.preprocess(df)

print(f'Processed data shape: {df_processed.shape}')
print(f'Missing values: {df_processed.isnull().sum().sum()}')
print(df_processed.head())

## 2. Train XGBoost Model

In [None]:
from model_trainer import EnergyPredictionModel, ModelConfig

# Prepare features and target
X = df_processed.drop('energy_consumption', axis=1)
y = df_processed['energy_consumption']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'Training set size: {X_train.shape}')
print(f'Test set size: {X_test.shape}')
print(f'Feature count: {X.shape[1]}')

In [None]:
# Train XGBoost
config = ModelConfig(model_type='xgboost')
model_xgb = EnergyPredictionModel(config)
metrics_xgb = model_xgb.train(X, y)

print('XGBoost Model Metrics:')
for metric, value in metrics_xgb.items():
    print(f'{metric}: {value:.4f}')

## 3. Train LightGBM Model

In [None]:
# Train LightGBM
config_lgb = ModelConfig(model_type='lightgbm')
model_lgb = EnergyPredictionModel(config_lgb)
metrics_lgb = model_lgb.train(X, y)

print('LightGBM Model Metrics:')
for metric, value in metrics_lgb.items():
    print(f'{metric}: {value:.4f}')

## 4. Model Comparison

In [None]:
import pandas as pd
comparison = pd.DataFrame({
    'XGBoost': metrics_xgb,
    'LightGBM': metrics_lgb
})
print(comparison)

# Plot comparison
comparison.plot(kind='bar', figsize=(10, 5))
plt.title('Model Performance Comparison')
plt.ylabel('Score')
plt.xticks(rotation=45)
plt.show()

## 5. Save Models

In [None]:
# Save both models
model_xgb.save('../models/xgboost/')
model_lgb.save('../models/lightgbm/')

print('Models saved successfully!')

## 6. Feature Importance

In [None]:
# Feature importance
importance = model_xgb.model.feature_importances_
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': importance
}).sort_values('importance', ascending=False)

print('Top 10 Important Features:')
print(feature_importance.head(10))

# Plot
plt.figure(figsize=(10, 6))
feature_importance.head(15).plot(x='feature', y='importance', kind='barh')
plt.title('Feature Importance')
plt.show()