In [6]:
# Panda for reading in csv file
import pandas as pd
# plt for plotting graphs
import matplotlib.pyplot as plt
# Linear regression models
from sklearn.linear_model import LinearRegression
# Neighbour regressor model
from sklearn.neighbors import KNeighborsRegressor
# Logistic Regression Model
from sklearn.linear_model import LogisticRegression
# Neural Network model
from sklearn.neural_network import MLPRegressor
# Decision Tree Regressor
from sklearn.tree import DecisionTreeRegressor
# GridSearch for hyperparameter turning
from sklearn.model_selection import GridSearchCV
# function for splitting training and testing data
from sklearn.model_selection import train_test_split
# error scores to compare different regression algorithms
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [7]:
# Setup DataFrame
df = pd.read_csv('data/DataEnergyClean.csv')

In [8]:
# Setting up our X for the multidimensional input
X = df.drop(columns='Appliances')
# Setting up the value we want to predict (Appliances, energy use in Wh)
y = df['Appliances']

# Setup the training data (X_train, y_train) which will use 80% of the dataset, 
# and the test data (X_test, y_test) which will use the remaining 20% of the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Neural Network

In [None]:
# DO NOT RUN THIS CELL

# Parameter grid with hyperparameters to tune
param_grid = {
    'hidden_layer_sizes': [(64, 32), (128, 64), (64, 32, 16), (64,)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.01, 0.1],
    'batch_size': [16, 32, 64],
    'early_stopping': [True, False],
    'alpha': [0.0001, 0.001, 0.01]
}

# Specify solver directly in MLPRegressor
neural_network1 = MLPRegressor(
    solver='adam',
    **param_grid  # Include other hyperparameters from param_grid
)

# Grid search to tune hyperparameters
grid_search = GridSearchCV(neural_network1, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

#Save and print best hyperparameters
best_params = grid_search.best_params_

print(grid_search.best_params_)

# Train final model with best hyperparameters from grid search
final_neural_network = MLPRegressor(
    **best_params,  
)
final_neural_network.fit(X_train, y_train)


# Linear Regression

In [4]:
# DO NOT RUN THIS CELL

# Parameter grid with hyperparameters to tune
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'positive': [True, False]
}

# Specify solver directly in LinearRegression
lr = LinearRegression(
    n_jobs=-1,
    **param_grid  # Include other hyperparameters from param_grid
)

# Grid search to tune hyperparameters
grid_search = GridSearchCV(lr, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

# Print best hyperparameters
print(grid_search.best_params_)

{'copy_X': True, 'fit_intercept': False, 'positive': False}


# K-NN

In [4]:
# DO NOT RUN THIS CELL

# Parameter grid with hyperparameters to tune
param_grid = {
    'n_neighbors': [2, 5, 10, 25],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [15, 30, 50],
    'p': [1, 2, 3, 5]
}

# Specify solver directly in KNeighborsRegressor
knn = KNeighborsRegressor(
    n_jobs=-1,
    **param_grid  # Include other hyperparameters from param_grid
)

# Grid search to tune hyperparameters
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

#Save and print best hyperparameters
print(grid_search.best_params_)

{'algorithm': 'auto', 'leaf_size': 15, 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}


# DTR

In [10]:
# DO NOT RUN THIS CELL (took like 50 mins)

# Parameter grid with hyperparameters to tune
param_grid = {
    'criterion': ['friedman_mse', 'squared_error', 'absolute_error', 'poisson'],
    'max_depth': [2, 5, 10, 15],
    'min_samples_split': [2, 5, 10, 15],
    'random_state': [2, 5, 10, 15]
}

# Specify solver directly in DTR
dtr = DecisionTreeRegressor(
    **param_grid  # Include other hyperparameters from param_grid
)

# Grid search to tune hyperparameters
grid_search = GridSearchCV(dtr, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

#Save and print best hyperparameters
print(grid_search.best_params_)

{'criterion': 'poisson', 'max_depth': 10, 'min_samples_split': 10, 'random_state': 2}
