In [1]:
!pip install pandas numpy scikit-learn xgboost matplotlib seaborn joblib




In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def load_data(load_csv, weather_csv):
    load_df = pd.read_csv(load_csv, parse_dates=['timestamp'])
    weather = pd.read_csv(weather_csv, parse_dates=['timestamp'])
    return load_df, weather

def merge_data(load_df, weather):
    df = pd.merge(load_df, weather, on='timestamp', how='left')
    df = df.sort_values('timestamp').reset_index(drop=True)
    return df

def engineer_features(df):
    df['hour'] = df['timestamp'].dt.hour
    df['dayofweek'] = df['timestamp'].dt.dayofweek
    df['month'] = df['timestamp'].dt.month
    df['is_weekend'] = df['dayofweek'].isin([5,6]).astype(int)
    
    # Lag features
    df['lag_1h'] = df['load_kwh'].shift(1)
    df['lag_24h'] = df['load_kwh'].shift(24)
    df['rolling_24h'] = df['load_kwh'].rolling(24).mean()
    
    # Fill NA values arising from shifts
    df = df.fillna(method='bfill').fillna(method='ffill')
    return df

def split_data(df, target_col='load_kwh', test_size=0.2, random_state=42):
    X = df.drop(columns=['timestamp', target_col])
    y = df[target_col]
    return train_test_split(X, y, test_size=test_size, random_state=random_state)


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def load_data(load_csv, weather_csv):
    load_df = pd.read_csv(load_csv, parse_dates=['timestamp'])
    weather = pd.read_csv(weather_csv, parse_dates=['timestamp'])
    return load_df, weather

def merge_data(load_df, weather):
    df = pd.merge(load_df, weather, on='timestamp', how='left')
    df = df.sort_values('timestamp').reset_index(drop=True)
    return df

def engineer_features(df):
    df['hour'] = df['timestamp'].dt.hour
    df['dayofweek'] = df['timestamp'].dt.dayofweek
    df['month'] = df['timestamp'].dt.month
    df['is_weekend'] = df['dayofweek'].isin([5,6]).astype(int)

    df['lag_1h'] = df['load_kwh'].shift(1)
    df['lag_24h'] = df['load_kwh'].shift(24)
    df['rolling_24h'] = df['load_kwh'].rolling(24).mean()

    df = df.fillna(method='bfill').fillna(method='ffill')
    return df

def split_data(df, target_col='load_kwh', test_size=0.2, random_state=42):
    X = df.drop(columns=['timestamp', target_col])
    y = df[target_col]
    return train_test_split(X, y, test_size=test_size, random_state=random_state)



In [17]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
import joblib
from sklearn.model_selection import train_test_split

# Generate synthetic dataset
np.random.seed(42)
n_samples = 1000

df = pd.DataFrame({
    'temperature': np.random.uniform(10, 35, n_samples),
    'humidity': np.random.uniform(30, 90, n_samples),
    'wind_speed': np.random.uniform(0.5, 5, n_samples),
    'solar_radiation': np.random.uniform(0, 1000, n_samples),
    'hour': np.random.randint(0, 24, n_samples),
    'dayofweek': np.random.randint(0, 7, n_samples),
    'month': np.random.randint(1, 13, n_samples),
    'is_weekend': np.random.randint(0, 2, n_samples),
    'lag_1h': np.random.uniform(80, 160, n_samples),
    'lag_24h': np.random.uniform(80, 160, n_samples),
    'rolling_24h': np.random.uniform(80, 160, n_samples)
})

# Generate target variable (load_kwh) with some noise
df['load_kwh'] = (
    0.3 * df['temperature'] +
    0.2 * df['humidity'] +
    0.1 * df['solar_radiation'] / 100 +
    0.25 * df['lag_1h'] +
    0.15 * df['lag_24h'] +
    np.random.normal(0, 5, n_samples)
)

# Split into train/test
X = df.drop(columns=['load_kwh'])
y = df['load_kwh']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = XGBRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, random_state=42)
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, "energy_model.joblib")
print("Model saved to energy_model.joblib")




Model saved to energy_model.joblib
