In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns

from datetime import datetime

from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression

ModuleNotFoundError: No module named 'numpy'

In [None]:
date_to_datetime = lambda row: datetime(row['date'].year,row['date'].month,row['date'].day,row['hour'])

# the hour in the weather datasets looks to be from 1 to 24 rather than 0 to 23
weather_date_to_datetime = lambda row: datetime(row['date'].year,row['date'].month,row['date'].day,row['hour']-1)

def datetime_to_weekend(row):  
    if row.weekday() < 5:
        return 0
    else:
        return 1

def datetime_to_night(row):
    if row.hour < 7 or row.hour > 23:
        return 1
    else:
        return 0
    
def heating_deg_day(row):
    return 18 - row["temperature"]

def cooling_deg_day(row):
    return row["temperature"] - 18

In [None]:
weather = pd.read_csv('./load_data/Weather_YVR.csv')
weather['date'] = pd.to_datetime(weather['date'])
weather['datetime'] = weather.apply(weather_date_to_datetime, axis=1)
weather.head()

In [None]:
main = []
for i in range(28):
    if (i + 1) == 15:
        continue
    df = pd.read_csv(f'./load_data/Residential_{i + 1}.csv')
    df['date'] = pd.to_datetime(df['date'])
    df['datetime'] = df.apply(date_to_datetime, axis=1)
    df["house"] = f"{i + 1}"
    df["weekend"] = df['datetime'].apply(datetime_to_weekend)
    df['night'] = df['datetime'].apply(datetime_to_night)
    main.append(df)

main = pd.concat(main)
main.head()

In [None]:
df = pd.merge(main, weather, how="left", on="datetime", suffixes=["", "_weather"])
df.head()

In [None]:
big_data = df[["night", "weekend", 'hour',"temperature", "energy_kWh"]].dropna(axis=0)
big_data

In [None]:
plot = sns.scatterplot(data=big_data.loc[big_data['hour']==12], x="temperature", y="energy_kWh")

In [None]:
ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False).set_output(transform="pandas")
ohehour = ohe.fit_transform(big_data[["hour"]])
ohenight = ohe.fit_transform(big_data[["night"]])
oheweekend = ohe.fit_transform(big_data[["weekend"]])
ohenight_weekend = ohe.fit_transform(big_data[["night", "weekend"]])
ohehour_weekend = ohe.fit_transform(big_data[["hour", "weekend"]])
oheall = ohe.fit_transform(big_data[["hour", "night", "weekend"]])

In [None]:
allDf = pd.concat([big_data, oheall], axis=1).drop(columns=["hour", "weekend", "night"])
hourDf = pd.concat([big_data, ohehour], axis=1).drop(columns=["hour", "weekend", "night"])
nightDf = pd.concat([big_data, ohenight], axis=1).drop(columns=["hour", "weekend", "night"])
weekendDf = pd.concat([big_data, oheweekend], axis=1).drop(columns=["hour", "weekend", "night"])
night_weekendDf = pd.concat([big_data, ohenight_weekend], axis=1).drop(columns=["hour", "weekend", "night"])
hour_weekendDf = pd.concat([big_data, ohehour_weekend], axis=1).drop(columns=["hour", "weekend", "night"])
allDf

In [None]:
model = LinearRegression()

#hour
train = hourDf.sample(frac=0.8, random_state=0)
test = hourDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("Hour only")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

#night
train = nightDf.sample(frac=0.8, random_state=0)
test = nightDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("Night only")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

#weekend
train = weekendDf.sample(frac=0.8, random_state=0)
test = weekendDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("Weekend only")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

#Weekend + night
train = night_weekendDf.sample(frac=0.8, random_state=0)
test = night_weekendDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("Weekend and night only")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

#Hour and Weekend
train = hour_weekendDf.sample(frac=0.8, random_state=0)
test = hour_weekendDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("Hour and weekend only")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

#all
train = allDf.sample(frac=0.8, random_state=0)
test = allDf.drop(train.index)

x_train = train.drop(columns=["energy_kWh"])
y_train = train["energy_kWh"]

x_test = test.drop(columns=["energy_kWh"])
y_test = test["energy_kWh"]

model.fit(x_train.to_numpy(), y_train.to_numpy())
print("All")
print(model.score(x_test.to_numpy(), y_test.to_numpy()))

In [None]:
one_house = pd.read_csv(f'./load_data/Residential_11.csv')
one_house['date'] = pd.to_datetime(one_house['date'])
one_house['datetime'] = one_house.apply(date_to_datetime, axis=1)
one_house["house"] = f"1"
one_house["weekend"] = one_house['datetime'].apply(datetime_to_weekend)
one_house['night'] = one_house['datetime'].apply(datetime_to_night)

one_house = pd.merge(one_house, weather, how="left", on="datetime", suffixes=["", "_weather"])
nonan_one_house = one_house[["night", "weekend", 'hour',"temperature", "energy_kWh"]].dropna(axis=0)

one_oheall = ohe.fit_transform(nonan_one_house[["hour", "night", "weekend"]])
all_one_house = pd.concat([nonan_one_house, one_oheall], axis=1).drop(columns=["hour", "weekend", "night"])

input = all_one_house.drop(columns=["energy_kWh"]).to_numpy()
y_pred = model.predict(input)
print(model.score(input, nonan_one_house["energy_kWh"]))

sns.scatterplot(data=nonan_one_house, x="temperature", y="energy_kWh")
sns.lineplot(data=nonan_one_house, x="temperature", y=y_pred, color="red")