In [None]:
# ✅ 1. Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
import os

In [None]:
# ✅ 2. Load & clean
df = pd.read_csv("energydata_complete.csv")
df.drop(columns=['date'], inplace=True)
df = df.select_dtypes(include=[np.number])


In [None]:
# ✅ 3. Define features
target = 'Appliances'
X = df.drop(columns=[target])
y = df[target]


In [None]:

# ✅ 4. Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:

# ✅ 5. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# ✅ 6. Models to train
models = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=100, n_jobs=-1),
    'XGBoost': XGBRegressor(n_estimators=100, verbosity=0, n_jobs=-1),
    'LightGBM': LGBMRegressor(n_estimators=100, n_jobs=-1)
}

results_df = pd.DataFrame()
model_metrics = []


In [None]:
# ✅ 7. Train & store results
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    temp = pd.DataFrame({
        'Model': name,
        'Actual': y_test.reset_index(drop=True),
        'Predicted': preds
    })

    temp['Green_Score'] = 100 - ((temp['Predicted'] - temp['Predicted'].min()) /
                                 (temp['Predicted'].max() - temp['Predicted'].min()) * 100)
    temp['Green_Score'] = temp['Green_Score'].clip(0, 100)
    results_df = pd.concat([results_df, temp], ignore_index=True)

    model_metrics.append({
        'Model': name,
        'MAE': round(mean_absolute_error(y_test, preds), 2),
        'RMSE': round(np.sqrt(mean_squared_error(y_test, preds)), 2),
        'R2': round(r2_score(y_test, preds), 4)
    })

    # ✅ Save model
    joblib.dump(model, f'models/{name}_model.pkl')


In [None]:
# ✅ 8. Save scaler
joblib.dump(scaler, 'models/scaler.pkl')


In [None]:
# ✅ 9. Save metrics
metrics_df = pd.DataFrame(model_metrics)
metrics_df.to_csv("models/model_metrics.csv", index=False)


In [None]:
# ✅ 10. Save results
results_df.to_csv("models/green_score_results.csv", index=False)



In [None]:
# ✅ 11. Feature importance using LightGBM
lightgbm = models['LightGBM']
importances = lightgbm.feature_importances_
features = X.columns
imp_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values(by='Importance', ascending=False)
imp_df.to_csv("models/feature_importance.csv", index=False)

print("✅ All models, metrics, and artifacts exported.")
