In [1]:
import pandas as pd
import numpy as np
import os, random
import sys
import matplotlib.pyplot as plt
import pickle
from datetime import datetime
from datetime import timedelta
from functions import *
import math
from sklearn.preprocessing import StandardScaler
from tslearn.utils import to_time_series_dataset

In [2]:
path = os.path.normpath(os.getcwd() + os.sep + os.pardir)

In [3]:
utility_path = os.path.join(path, 'Data', 'DOE')
winter_flex_path = os.path.join(path, 'Data', 'Flexibility', 'Winter User Full')

In [4]:
# This reads through the utilities file from the DOE data and finds all utilities that are located in Texas.

utility = pd.read_csv(r"C:\Users\sienna\Desktop\Research\Breakthrough-Energy\Energy Coupon Data\Data\utilities.csv")
utility = utility[utility['State'] == 'TX']

data = []
for folders in os.listdir(utility_path):
    folders = int(folders)
    if folders in utility['utility'].values:
        data.append(folders)

In [5]:
# Reading the heating and cooling data

heating_temp_full = pd.DataFrame()
cooling_temp_full = pd.DataFrame()

for folder in data:
    heating_doe_path = os.path.join(utility_path, str(folder), 'residential_heating.csv')
    heating_temp = pd.read_csv(heating_doe_path, index_col='time')
    
    cooling_doe_path = os.path.join(utility_path, str(folder), 'residential_cooling.csv')
    cooling_temp = pd.read_csv(cooling_doe_path, index_col='time')
    
    heating_temp_full.insert(0, folder, heating_temp.iloc[:, 0])
    cooling_temp_full.insert(0, folder, cooling_temp.iloc[:, 0])

heating_temp_full.index = pd.to_datetime(heating_temp_full.index)
cooling_temp_full.index = pd.to_datetime(cooling_temp_full.index)

heating_temp_full = heating_temp_full.loc[(heating_temp_full.index.month < 5) | (heating_temp_full.index.month > 10)]
cooling_temp_full = cooling_temp_full.loc[(cooling_temp_full.index.month >= 5) & (cooling_temp_full.index.month <= 10)]

# There a slight mismatch in number of days, so we are truncating the cooling data by one day to match the heating data
cooling_temp_full = cooling_temp_full.iloc[0:len(heating_temp_full), :]

In [6]:
# This section creates a file named "ratio.csv" which is the ratio of Summer flexibility and Winter flexibility calculated from the DOR flexibility data set.
# Only need to run once.

temp_full = pd.DataFrame(heating_temp_full.values / cooling_temp_full.values)
temp_full.columns = heating_temp_full.columns
temp_full = temp_full[np.isfinite(temp_full)]
temp_full = temp_full.fillna(0)
temp_full.index = heating_temp_full.index

temp_full.index = pd.to_datetime(temp_full.index)
temp_full = temp_full.loc[(temp_full.index.month < 5) | (temp_full.index.month > 10)]

temp_full.to_csv(os.path.join(path, 'Data', 'DOE Ratio', 'ratio.csv'))

  temp_full = pd.DataFrame(heating_temp_full.values / cooling_temp_full.values)


In [7]:
# Reads the ratio file

ratio = pd.read_csv(os.path.join(path, 'Data', 'DOE Ratio', 'ratio.csv'), index_col='time')
ratio.index = pd.to_datetime(ratio.index)

# We are selecting the one utility to do the rest of the calculation, this utility's profile looks like a residential load whereas the other ones look like industry or commercial
ratio = ratio.iloc[:, 0]

In [8]:
# Gather all users from the EnergyCoupon data set

users = []
for file in os.listdir(os.path.join(path, 'Data', 'Flexibility', 'Summer User Full')):
    users.append(file.split(sep=' ')[0])

In [9]:
# Creating a data range

ratio_idx = pd.date_range(ratio.index[0], ratio.index[-1], freq='1D')

In [10]:
# Now we can multiplying the DOE ratio to the EnergyCoupon flexibility. But since with the DOE flexibility, the higher the number indicates
# higher flexibility, but EnergyCoupon flexibility is the opposite, so when we multiply the two, we use 1-summer_flexibility to make the two 
# series align.

for user in users:
    
    user_flex = pd.DataFrame()
    
    for i in ratio_idx[:-1]:
        summer_flex = pd.read_csv(os.path.join(path, 'Data', 'Flexibility', 'Summer User Full', f'{user} full flex.csv'))
        # summer_flex = pd.DataFrame(summer_flex.groupby(np.arange(len(summer_flex)) // 4).mean())
        summer_flex = pd.DataFrame(summer_flex.iloc[:, random.choice(np.arange(len(summer_flex.columns), dtype=int))])        
        temp = pd.DataFrame(ratio.loc[i:i+timedelta(days=1)-timedelta(minutes=15)])
        
        if len(temp) == 0:
            continue
        
        temp = pd.DataFrame(temp.values.reshape(-1, ) * (1 - summer_flex.values.reshape(-1, )))

        temp.columns = [str(i).split(sep=' ')[0]]
        user_flex.insert(0, str(i).split(sep=' ')[0], temp)
        
    user_flex.to_csv(os.path.join(winter_flex_path, f'{user} full flex.csv'), index=False)

  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i).split(sep=' ')[0], temp)
  user_flex.insert(0, str(i)