In [1]:
import pandas as pd
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from helpers import *
from sklearn.preprocessing import StandardScaler
from tslearn.utils import to_time_series_dataset

In [2]:
path = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [3]:
# Reading the full EC data for participants

df = pd.read_csv(os.path.join(path, 'Data', 'participant full.csv'), index_col='time')
df.index = pd.to_datetime(df.index)

# Separate the EC data into during experiment and before experiment
df_before = df.loc[df.index < datetime(2017, 6, 10)]
df_during = df.loc[(df.index >= datetime(2017, 6, 10)) & (df.index <= datetime(2017, 8, 26))]

In [4]:
# Reformatting weather data

weather = pd.DataFrame()
for file in os.listdir(os.path.join(path, 'Data', 'Weather', 'Houston')):
    temp = pd.read_csv(os.path.join(path, 'Data', 'Weather', 'Houston', file))
    temp.index = pd.to_datetime(temp[['Year', 'Month', 'Day', 'Hour', 'Minute']])
    temp = temp.iloc[:, 5:]
    
    weather = pd.concat((weather, temp))

weather = weather[['Wind Speed', 'Relative Humidity', 'Temperature']]
weather = weather.resample('15T').interpolate()

weather['day of week'] = weather.index.strftime('%A')
# Separate the weather to during and before EC experiment
weather_before = weather.loc[weather.index < datetime(2017, 6, 10)].copy()
weather_during = weather.loc[(weather.index >= datetime(2017, 6, 10)) & (weather.index <= datetime(2017, 8, 26))].copy()

In [5]:
# A helper plotting function to see the similar day results
def plot_similar_day(before, after, title):
    plt.figure(figsize=(10,5))
    plt.plot(before, label='Similar Day Average Before Experiment')
    plt.plot(after, label='Similar Day Average During Experiment')
    plt.xlabel('Hours (total 8 Hours, half hour interval)')
    plt.ylabel('Power (kW)')
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
# Clustering users based on similar day algorithm

# A list of all sections within a day, also separated into Winter and Summer
titles = ['Summer Weekend Morning', 'Summer Weekend Noon', 'Summer Weekend Evening', 'Summer Weekend Night',
          'Summer Weekday Morning', 'Summer Weekday Noon', 'Summer Weekday Evening', 'Summer Weekday Night']

clusters = [0, 1, 2]
ts_length = 24

# Loop to generate similar day results for all users
for x in titles:
    title = x
    for j in clusters:
        cluster = j
        for i in df.columns:
            user = i
            
            title_temp = title.split(sep=' ')
            weather_wk_su_morning_idx, km_wk_su_morning, y_pred_wk_su_morning = train(weather_before, 3, title_temp[0], title_temp[1], title_temp[2], ts_length)
            before = similar_day(df_before, user, weather_before, weather_wk_su_morning_idx, y_pred_wk_su_morning, cluster, title, ts_length, mean=True)
            if before is not None:
                before = before.to_frame()
                before.columns = ['baseline']
    
                before.to_csv(os.path.join(path, 'Data', 'Similar Day', 'Baseline', f'{user} {title} cluster {cluster} baseline.csv'), index=False)
                
                weather_wk_su_morning_idx_during, km_wk_su_morning_during, y_pred_wk_su_morning_during = predict(weather_during, km_wk_su_morning, title_temp[0], title_temp[1], title_temp[2], ts_length)
                after = similar_day(df_during, user, weather_during, weather_wk_su_morning_idx_during, y_pred_wk_su_morning_during, cluster, title, ts_length, mean=False)
                if after is not None:
                    after.to_csv(os.path.join(path, 'Data', 'Similar Day', 'DR', f'{user} {title} cluster {cluster}.csv'), index=False)

  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp.insert(0, start.strftime('%Y-%m-%d'), user_similar_day.loc[(user_similar_day.index >= start) & (user_similar_day.index < end)].values)
  temp