In [14]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [15]:

df = pd.read_csv(r'data\Dataset.csv')
df.rename(columns={'Datetime': 'ds', 'Occupancy': 'y'}, inplace=True)

df['ds'] = pd.to_datetime(df['ds'])

df['hour'] = df['ds'].dt.hour
df['day'] = df['ds'].dt.day
df['dayofweek'] = df['ds'].dt.dayofweek
df['month'] = df['ds'].dt.month
df.head()
results = []

In [16]:
df = df.sort_values(by='ds')
train = df.iloc[:int(0.8*len(df))]
test = df.iloc[int(0.8*len(df)):]

x = ['hour', 'day', 'dayofweek', 'month']
y = ['y']

xtrain = train[x]
ytrain = train[y]
xtest = test[x]
ytest = test[y]

In [17]:
def metrics(y_true, y_pred):
    mae = round(mean_absolute_error(y_true, y_pred), 3)
    mse = round(mean_squared_error(y_true, y_pred), 3)
    rmse = round(np.sqrt(mse), 3)
    r2 = round(r2_score(y_true, y_pred), 3)
    return mae, rmse, mse, r2

In [18]:
from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor(max_depth=10, random_state=42)
dt.fit(xtrain, ytrain)
ypred = dt.predict(xtest)

mae, rmse, mse, r2 = metrics(ytest, ypred)
print("MAE:", round(mae, 3))
print("RMSE:", round(rmse, 3))
print("MSE:", round(mse, 3))
print("R² Score:", round(r2, 3))

results.append(['Decision Tree', mae, rmse, mse, r2])

MAE: 0.439
RMSE: 0.567
MSE: 0.321
R² Score: 0.797


In [19]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
rf.fit(xtrain, ytrain)
ypred = rf.predict(xtest)

mae, rmse, mse, r2 = metrics(ytest, ypred)
print("MAE:", round(mae, 3))
print("RMSE:", round(rmse, 3))
print("MSE:", round(mse, 3))
print("R² Score:", round(r2, 3))

results.append(['Random Forest', mae, rmse, mse, r2])

  return fit_method(estimator, *args, **kwargs)


MAE: 0.432
RMSE: 0.54
MSE: 0.292
R² Score: 0.815


In [20]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(xtrain, ytrain)
ypred = lr.predict(xtest)

mae, rmse, mse, r2 = metrics(ytest, ypred)
print("MAE:", round(mae, 3))
print("RMSE:", round(rmse, 3))
print("MSE:", round(mse, 3))
print("R² Score:", round(r2, 3))

results.append(['Lineaar Regression', mae, rmse, mse, r2])

MAE: 0.939
RMSE: 1.147
MSE: 1.315
R² Score: 0.17


In [21]:
from sklearn.svm import SVR
svm = SVR()
svm = svm.fit(xtrain, ytrain)
ypred = svm.predict(xtest)

mae, rmse, mse, r2 = metrics(ytest, ypred)
print("MAE:", round(mae, 3))
print("RMSE:", round(rmse, 3))
print("MSE:", round(mse, 3))
print("R² Score:", round(r2, 3))

results.append(['Support Vector Regressor', mae, rmse, mse, r2])

  y = column_or_1d(y, warn=True)


MAE: 0.633
RMSE: 0.806
MSE: 0.649
R² Score: 0.59


In [22]:
from sklearn.ensemble import GradientBoostingRegressor
gbr = GradientBoostingRegressor(n_estimators=100, max_depth=10, random_state=42)
gbr.fit(xtrain, ytrain)
ypred = gbr.predict(xtest)

mae, rmse, mse, r2 = metrics(ytest, ypred)
print("MAE:", round(mae, 3))
print("RMSE:", round(rmse, 3))
print("MSE:", round(mse, 3))
print("R² Score:", round(r2, 3))

results.append(['Gradient Boosting', mae, rmse, mse, r2])

  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


MAE: 0.444
RMSE: 0.587
MSE: 0.344
R² Score: 0.783


In [37]:
results_df  = pd.DataFrame(results, columns= ['Model', 'MAE', 'RMSE', 'MSE', 'R2'], index = [1,2,3,4,5])
results_df

Unnamed: 0,Model,MAE,RMSE,MSE,R2
1,Decision Tree,0.439,0.567,0.321,0.797
2,Random Forest,0.432,0.54,0.292,0.815
3,Lineaar Regression,0.939,1.147,1.315,0.17
4,Support Vector Regressor,0.633,0.806,0.649,0.59
5,Gradient Boosting,0.444,0.587,0.344,0.783
