In [1]:
import pandas as pd
import numpy as np
import os
import sys
import pickle
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import timedelta
from functions import *
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tslearn.utils import to_time_series_dataset
from functools import reduce

In [2]:
path = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [3]:
DR_path = os.path.join(path, 'Data', 'Similar Day', 'DR')
baseline_path = os.path.join(path, 'Data', 'Similar Day', 'Baseline')
flex_path = os.path.join(path, 'Data', 'Flexibility', 'Raw')

In [4]:
# Going through the clustered files and calculate the flexibility by taking the ratio of power consumption during the DR time period and the baseline

for dr_file in os.listdir(DR_path):
    
    # Some days did not have any matching weather patterns to one of the cluster so an empty file might be saved, this helps to filter out those files
    if os.stat(os.path.join(DR_path, dr_file)).st_size > 2:
        if os.path.exists(os.path.join(DR_path, dr_file)):
            dr = pd.read_csv(os.path.join(DR_path, dr_file))
            baseline_file = dr_file.split(sep='.')[0] + ' baseline.csv'
            
            if os.path.exists(os.path.join(baseline_path, baseline_file)):
                baseline = pd.read_csv(os.path.join(baseline_path, baseline_file))
                flex_file = dr_file.split(sep='.')[0] + ' flex.csv'
                
                dr = dr.groupby(np.arange(len(dr)) // 4).sum()
                baseline = baseline.groupby(np.arange(len(baseline)) // 4).sum()
                
                # Find flex by calculating the ratio between usage during DR and usage from baseline
                # Smaller the better -> if baseline is larger, then the value is smaller
                ratio = pd.DataFrame(dr.values / baseline.values)
                ratio.columns = dr.columns
                ratio.to_csv(os.path.join(flex_path, flex_file), index=False)

In [5]:
# Getting all users from the files

users = []
for file in os.listdir(os.path.join(flex_path)):
    temp = file.split(sep=' ')[0]
    if temp not in users:
        users.append(temp)

In [6]:
clusters = [0, 1, 2]
title = ['Summer Weekday', 'Summer Weekend']

# Helper function to see if the file exists or not
def read_df_exist(path):
    if os.path.exists(path):
        return pd.read_csv(path)
    else:
        return None

The next section finds all of the time sections from the same day, of the same user, and from the same cluster number into one file
The second method is to find all of the time sections from the same day, of the same user but from multiple clusters into one file

In [7]:
# This section creates helper dictionaries that will be used to piece together all of the time sections spread across different clusters of the same user

# Gathers all of the users
users = []
for file in os.listdir(os.path.join(path, 'Data', 'Flexibility', 'Raw')):
    user = file.split(sep=' ')[0]
    users.append(user)
    
users = set(users)

# Creates a dictionary with the users as the key and the file names under the user as array
files = {}
for user in users:
    files[user] = []
    for file in os.listdir(os.path.join(path, 'Data', 'Flexibility', 'Raw')):
        if file.startswith(user):
            files[user].append(file)

# Reads in all of the raw flexibility files for that user
users = {}
for key, items in files.items():
    users[key] = {}
    for item in items:
        users[key][item] = pd.read_csv(os.path.join(path, 'Data', 'Flexibility', 'Raw', item))

# Gathers all of the dates in each user's flexibility files
dates = []
for key, items in users.items():
    for file, df in users[key].items():
        dates.append(df.columns)

# All unique days
days = []
for i in dates:
    for j in i:
        days.append(j)

days = set(days)


In [8]:
# For Winter, create mapping; only need to run once

winter_days = {}
start = datetime(2005,12,31)

for day in days:
    winter_days[start] = day
    start = start + timedelta(days=1)

days_temp = []
for key, item in winter_days.items():
    days_temp.append(key)

winter_days = {}
start = datetime(2017, 1, 1)

for day in days_temp:
    winter_days[start] = day
    start = start + timedelta(days=1)
    
with open(os.path.join(path, 'Data', 'summer2winter days.pickle'), 'wb') as handle:
    pickle.dump(winter_days, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
# This returns a nested dictionary that first has the user id as the key, then on the inner dictionary, it has the date as the key,
# the items are the flexibility files that contains that day.

user_day_dict = {}

for user in users:
    user_day_dict[user] = {}
    for day in days:
        user_day_dict[user][day] = []
        for i in users[user]:
            if day in users[user][i].columns:
                user_day_dict[user][day].append(i)

In [11]:
# This outputs the flexibility profile for all users for the full day, instead of 4 sections

for user, days in user_day_dict.items():
    full_df = pd.DataFrame()
    
    for i in days:
        temp = days[i]
        
        # See if the time section contains all hours which is 4 hours. 
        if len(temp) == 4:
            for file in temp:             
                temp_df = pd.read_csv(os.path.join(path, 'Data', 'Flexibility', 'Raw', file))
                temp_df = temp_df[[i]]
                if 'Morning' in file:
                    morning = temp_df
                elif 'Noon' in file:
                    noon = temp_df
                elif 'Evening' in file:
                    evening = temp_df
                elif 'Night' in file:
                    night = temp_df

            full_df.insert(0, i, pd.concat((morning, noon, evening, night)))
        else:
            continue

    full_df.reset_index(inplace=True, drop=True)
    full_df.to_csv(os.path.join(path, 'Data', 'Flexibility', 'Summer User Full', f'{user} full flex.csv'), index=False)