In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

file_path = "energy_consumption.csv"
df = pd.read_csv(file_path)

df.columns = df.columns.str.strip()

df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df.dropna(subset=['timestamp'], inplace=True)
df.set_index('timestamp', inplace=True)

features = ['day_of_week', 'month', 'hour', 'weekend', 'holiday', 'temperature', 'humidity', 'wind_speed',
            'solar_radiation', 'building_size', 'occupants', 'building_type', 'HVAC_usage', 'lighting_usage',
            'appliance_load', 'previous_hour_consumption', 'previous_day_consumption', 'previous_week_consumption',
            ]
target = 'energy_consumption'

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_scaled = scaler.transform(X)


rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train_scaled, y_train)

rf_model_full = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model_full.fit(X_scaled, y)


with open("rf_model_full.pkl", "wb") as model_file:
    pickle.dump(rf_model_full, model_file)

with open("scaler.pkl", "wb") as scaler_file:
    pickle.dump(scaler, scaler_file)