In [None]:
# Step 1: File Upload

import pandas as pd
from sklearn.model_selection import train_test_split

# Load dataset
file_path = '/mnt/data/Complete_South_African_Energy_Consumption.csv'
df = pd.read_csv(file_path)

# Check the first few rows
print(df.head())


In [None]:
# Step 2: Data Preprocessing

# Convert 'DateTime' to datetime format and extract features
df['DateTime'] = pd.to_datetime(df['DateTime'])
df['hour'] = df['DateTime'].dt.hour
df['day'] = df['DateTime'].dt.day
df['month'] = df['DateTime'].dt.month
df['day_of_week'] = df['DateTime'].dt.dayofweek

# Drop the 'DateTime' column
df = df.drop(columns=['DateTime'])

# One-hot encode 'Building' column
df = pd.get_dummies(df, columns=['Building'], drop_first=True)

# Check for missing values
print(df.isnull().sum())

# Drop rows with missing values or fill them as necessary
df = df.dropna()

# Separate features and target
X = df.drop(columns=['target_column'])  # Replace 'target_column' with your actual target
y = df['target_column']


In [None]:
# Step 3: Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Step 4: Model Building

from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Example: LSTM Model
X_train_rnn = X_train.values.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_rnn = X_test.values.reshape((X_test.shape[0], X_test.shape[1], 1))

lstm_model = Sequential()
lstm_model.add(LSTM(50, return_sequences=True, input_shape=(X_train_rnn.shape[1], 1)))
lstm_model.add(LSTM(50))
lstm_model.add(Dense(1))

lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train_rnn, y_train, epochs=10, batch_size=32)

# XGBoost Model
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)

# Random Forest Model
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

# Decision Tree Model
dt_model = DecisionTreeRegressor()
dt_model.fit(X_train, y_train)

# SVM Model
svm_model = SVR()
svm_model.fit(X_train, y_train)


In [None]:
# Step 5: Model Evaluation

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
import numpy as np

def evaluate_model(y_true, y_pred, X_train):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    adj_r2 = 1 - (1 - r2) * (len(y_true) - 1) / (len(y_true) - X_train.shape[1] - 1)
    evs = explained_variance_score(y_true, y_pred)
    
    return {'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'R²': r2, 'Adj R²': adj_r2, 'Explained Variance': evs}

# Evaluate LSTM model
lstm_pred = lstm_model.predict(X_test_rnn)
model_performance = {}
model_performance['LSTM'] = evaluate_model(y_test, lstm_pred, X_train)

# Evaluate XGBoost model
xgb_pred = xgb_model.predict(X_test)
model_performance['XGBoost'] = evaluate_model(y_test, xgb_pred, X_train)

# Similarly, evaluate other models: Random Forest, Decision Tree, SVM


In [None]:
# Step 6: Plotting Model Performance

import matplotlib.pyplot as plt

# Example: Plot MSE for all models
model_names = list(model_performance.keys())
mse_scores = [model_performance[model]['MSE'] for model in model_performance]

plt.figure(figsize=(10, 6))
plt.bar(model_names, mse_scores)
plt.title('Model Comparison - MSE')
plt.xlabel('Models')
plt.ylabel('MSE')
plt.show()


In [None]:
# Step 7: Residual Plot

import seaborn as sns

def plot_residuals(y_true, y_pred, model_name):
    residuals = y_true - y_pred
    plt.figure(figsize=(10, 6))
    sns.scatterplot(x=y_pred, y=residuals)
    plt.axhline(0, color='red', linestyle='--')
    plt.title(f'Residual Plot for {model_name}')
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.show()

# Example for LSTM
plot_residuals(y_test, lstm_pred, 'LSTM')
