In [2]:
# Libraries

import pickle
import wandb
import numpy as np

from dataset import energyProject_dataset
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [87]:
!wandb login

wandb: Currently logged in as: juliagartor (energy_project_uab). Use `wandb login --relogin` to force relogin


In [88]:
#load dataset object file

with (open('dataset_class.pkl', "rb")) as openfile:
    dataset = pickle.load(openfile)


https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


In [89]:
data = dataset.__getitem__(34) #Only one sample

In [90]:
data["y"].numpy()

array([25.55,  1.5 , 69.6 ])

In [91]:
#Changin the dataset mode to 0

dataset.activitivity_encoding_mode = 1

In [92]:
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42) #Split into train and test

In [136]:
batch_size = 2  # Number of samples per batch
shuffle = False

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


- Linear Regression

In [108]:
model = LinearRegression()

In [109]:
for batch in train_dataloader:
    
    batch_features = batch['x'].numpy()
    batch_labels =  batch['y'].numpy()

    model.fit(batch_features, batch_labels)


In [117]:
mse_list = []
rmse_list = []
r2_list = []

for batch in test_dataloader:
    batch_features = batch['x'].numpy()
    batch_labels = batch['y'].numpy()

    y_pred = model.predict(batch_features)

    mse = mean_squared_error(batch_labels, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(batch_labels, y_pred)

    mse_list.append(mse)
    rmse_list.append(rmse)
    r2_list.append(r2)


average_mse = sum(mse_list) / len(mse_list)
average_rmse = sum(rmse_list) / len(rmse_list)
average_r2 = sum(r2_list) / len(r2_list)

print("Average Metrics:")
print("Mean Squared Error (MSE):", average_mse)
print("Root Mean Squared Error (RMSE):", average_rmse)
print("R-squared (R2) Score:", average_r2)


Average Metrics:
Mean Squared Error (MSE): 6138.7168359419
Root Mean Squared Error (RMSE): 59.45904653597001
R-squared (R2) Score: -25009.33794576965


- Polynomial Regression

In [124]:
poly = PolynomialFeatures()

In [126]:
for batch in train_dataloader:
    
    batch_features = batch['x'].numpy()
    batch_labels = batch['y'].numpy()

    X_poly = poly.fit_transform(batch_features)
    
    model = LinearRegression()
    model.fit(X_poly, batch_labels)

In [129]:
mse_list = []
rmse_list = []
r2_list = []

for batch in test_dataloader:

    batch_features = batch['x'].numpy()
    batch_labels = batch['y'].numpy()

    x_new_poly = poly.transform(batch_features)
    y_pred = model.predict(x_new_poly)

    mse = mean_squared_error(batch_labels, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(batch_labels, y_pred)

    mse_list.append(mse)
    rmse_list.append(rmse)
    r2_list.append(r2)

average_mse = sum(mse_list) / len(mse_list)
average_rmse = sum(rmse_list) / len(rmse_list)
average_r2 = sum(r2_list) / len(r2_list)

print("Average Metrics:")
print("Mean Squared Error (MSE):", average_mse)
print("Root Mean Squared Error (RMSE):", average_rmse)
print("R-squared (R2) Score:", average_r2)


Average Metrics:
Mean Squared Error (MSE): 6135.024787578876
Root Mean Squared Error (RMSE): 59.42835747460919
R-squared (R2) Score: -24547.254748517516


- Random Forests

In [137]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [None]:
for batch in train_dataloader:

    batch_features = batch['x'].numpy()
    batch_labels = batch['y'].numpy()

    model.fit(batch_features, batch_labels)

In [135]:
mse_list = []
rmse_list = []
r2_list = []

for batch in test_dataloader:
    batch_features = batch['x'].numpy()
    batch_labels = batch['y'].numpy()

    y_pred = model.predict(batch_features)

    mse = mean_squared_error(batch_labels, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(batch_labels, y_pred)

    mse_list.append(mse)
    rmse_list.append(rmse)
    r2_list.append(r2)

average_mse = sum(mse_list) / len(mse_list)
average_rmse = sum(rmse_list) / len(rmse_list)
average_r2 = sum(r2_list) / len(r2_list)

print("Average Metrics:")
print("Mean Squared Error (MSE):", average_mse)
print("Root Mean Squared Error (RMSE):", average_rmse)
print("R-squared (R2) Score:", average_r2)


Average Metrics:
Mean Squared Error (MSE): 6189.588998893645
Root Mean Squared Error (RMSE): 59.7398244785821
R-squared (R2) Score: -22677.645100802984
