In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from LinearRegression import LinearRegression

In [None]:
data_path = Path.cwd()
df = pd.read_csv(data_path / 'test_energy_data.csv')
df.head(10)

## Visualize data

In [None]:
sns.pairplot(df, hue='Day of Week')

In [None]:
numeric_df = df.select_dtypes(include=[np.number])
sns.heatmap(numeric_df.corr(), annot=True, cmap='coolwarm')

# Prepare data

In [None]:
# binarize Day of Week to -1 for weekend and 1 for weekday
df.loc[df['Day of Week']=='Weekend', 'Day of Week']=-1
df.loc[df['Day of Week']=='Weekday', 'Day of Week']=1

# # binarize smoker to -1 for yes and 1 for no
# df.loc[df['smoker']=='yes', 'smoker']=-1
# df.loc[df['smoker']=='no', 'smoker']=1

# Remove Building Type
if 'Building Type' in df.columns:
    df = df.drop(['Building Type'], axis=1)
df.head(10)

# Linear Regression

In [None]:
# Choose features to predict
features = df.columns[:2]
X, y = df[features].to_numpy(), df[['Energy Consumption']].to_numpy()

In [None]:
model = LinearRegression(X=X, y_gt=y, lr=1e-9, eps=1e-6)

In [None]:
model.fit()
print(f'R-squared: {model.score():.04f}')

In [None]:
model.plot_featureimportance(names=features)

In [None]:
def plot_mse(y_pred, y_gt, threshold=0):
    error = np.abs(y_pred-y_gt)

    plt.figure(figsize=(30,10))
    plt.stem(error, linefmt='navy')
    plt.hlines(threshold, 0, y_pred.shape[0], lw=2, ls=':', color='darkorange')
    plt.ylabel('|y_pred-y_gt|')
    plt.show()

In [None]:
y_pred = model.predict()

In [None]:
plot_mse(y_pred, y)