In [564]:
# Reset all variables so that we can 'run all cells' and not get unused variables hanging around
%reset -f

In [565]:
import pandas as pd
import numpy as np
from pivottablejs import pivot_ui
import random
import json
from collections import defaultdict
from functools import partial

## Import measured usage + summary stats

In [566]:
df = pd.read_csv("data/welder_time_local.csv").set_index('time_local')
df.sample(10)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-09 11:00:00+03:00,0,Sunday,11,6,6_Sunday
2018-09-16 12:00:00+03:00,3,Sunday,12,6,6_Sunday
2018-09-15 00:00:00+03:00,0,Saturday,0,5,5_Saturday
2018-09-19 03:00:00+03:00,0,Wednesday,3,2,2_Wednesday
2018-09-09 08:00:00+03:00,0,Sunday,8,6,6_Sunday
2018-09-10 15:00:00+03:00,0,Monday,15,0,0_Monday
2018-09-13 02:00:00+03:00,0,Thursday,2,3,3_Thursday
2018-09-20 08:00:00+03:00,0,Thursday,8,3,3_Thursday
2018-09-14 16:00:00+03:00,0,Friday,16,4,4_Friday
2018-09-19 11:00:00+03:00,2,Wednesday,11,2,2_Wednesday


In [567]:
df.describe()

Unnamed: 0,welder_is_on,hour,day_index
count,311.0,311.0,311.0
mean,0.421222,11.527331,2.906752
std,2.32644,6.927625,2.051196
min,0.0,0.0,0.0
25%,0.0,6.0,1.0
50%,0.0,12.0,3.0
75%,0.0,17.5,5.0
max,26.0,23.0,6.0


In [568]:
df['welder_is_on'].sum()

131

In [584]:
# To come up with the config options, use this example to output the config: 
# https://pivottable.js.org/examples/onrefresh.html
pivot_ui(df, 
         rows=['day'],
         cols=['hour'],
         rendererName="Table Barchart",
         aggregatorName="Integer Sum",
         vals=["welder_is_on"])

In [585]:
pivot_ui(df, 
         rows=['day'],
         cols=['hour'],
         rendererName="Table Barchart",
         aggregatorName="Average",
         vals=["welder_is_on"])

In [586]:
pivot_ui(df, 
         rows=['day'],
         cols=['hour'],
         rendererName="Table Barchart",
         aggregatorName="Sum over Sum",
         vals=["welder_is_on", "hour"])

## Example functions for next section:

In [572]:
measured = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
s1 = random.choice(measured)
s1

10

In [573]:
df[df['day_name'] == 'Monday'].sample(3)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-17 08:00:00+03:00,0,Monday,8,0,0_Monday
2018-09-10 12:00:00+03:00,3,Monday,12,0,0_Monday
2018-09-17 09:00:00+03:00,8,Monday,9,0,0_Monday


## Create dataset for the load profile generator

Take a sample of every hour of every weekday: So for example:

```
measured_usage = {
  fri_09: [0, 1, 0, 3, 0, 1, 1]     # Friday @ 9am
  sat_10: [0, 0, 1, 30, 10, 0, 2]   # Saturday @ 10am
  ...
}

```

In [574]:
day_range = range(7)
hour_range = range(25)

In [575]:
df.sample(5)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-14 14:00:00+03:00,0,Friday,14,4,4_Friday
2018-09-17 07:00:00+03:00,0,Monday,7,0,0_Monday
2018-09-16 22:00:00+03:00,0,Sunday,22,6,6_Sunday
2018-09-15 23:00:00+03:00,0,Saturday,23,5,5_Saturday
2018-09-08 21:00:00+03:00,0,Saturday,21,5,5_Saturday


In [576]:
# Shortens a day name to the first 3 letters (Saturday => sat)
def shorten_day_name(day_string):
    return day_string[0:3].lower()

# Python allows tuples as dict keys. For example: ("Saturday" 10): [0, 0, 2, 4, 0]
# But this data structure will be exported to JSON, which only allows string keys. 
# So the key generated by this function will look like this: "sat_10": [0, 0, 2, 4, 0]
def composite_key(day_name, hour):
    padded_hour = str(hour).zfill(2)
    return "{}_{}".format(shorten_day_name(day_name), padded_hour)

In [577]:
# TODO: document this function
def create_usage_profile_data(df):
    dict = defaultdict(list)
    for index, row in df.iterrows():
        key = composite_key(row['day_name'], row['hour'])
        dict[key].append(row['welder_is_on'])
    return dict

measured_usage = create_usage_profile_data(df)
measured_usage

defaultdict(list,
            {'fri_00': [0, 0],
             'fri_01': [0, 0],
             'fri_02': [0, 0],
             'fri_03': [0],
             'fri_04': [0],
             'fri_05': [0],
             'fri_06': [0],
             'fri_07': [0],
             'fri_08': [9],
             'fri_09': [5],
             'fri_10': [0],
             'fri_11': [0],
             'fri_12': [0],
             'fri_13': [0],
             'fri_14': [0],
             'fri_15': [0],
             'fri_16': [0],
             'fri_17': [0],
             'fri_18': [0],
             'fri_19': [0],
             'fri_20': [0],
             'fri_21': [0],
             'fri_22': [0],
             'fri_23': [0],
             'mon_00': [0, 0],
             'mon_01': [0, 0],
             'mon_02': [0, 0],
             'mon_03': [1, 0],
             'mon_04': [0, 0],
             'mon_05': [0, 0],
             'mon_06': [0, 0],
             'mon_07': [0, 0],
             'mon_08': [0, 0],
             'mon_09':

In [578]:
# Make sure there is a key for every hour of every day of the week  
# represented: (7x24 = 168) 
len(measured_usage) == 7*24

True

In [579]:
# This dataset is everything the web app needs to generate a 52-week
# usage profile based on sampling (more on that below). 
# Output to JSON so it can be imported into the app
with open('data/welder_usage_generator_data.json', 'w') as fp:
    json.dump(measured_usage, fp)

## Create Usage Profile Generator
Day 1 of the year (Sat @ 10am): sample(measured_usage.sat_10)

Day 2 of the year (Frid @ 9am): sample(measured_usage.sat_10)

Check: see if the sum, avg and std roughly matches the measured values

In [580]:
# TODO: document 
def create_year_range_df(year=2018):
    start_date_str = '1/1/{}'.format(year + 1)
    start_date = pd.to_datetime(start_date_str) - pd.Timedelta(days=365)
    hourly_periods = 8760
    date_range = pd.date_range(start_date, periods=hourly_periods, freq='H')
    year_hours = list(range(len(date_range)))
    df_year = pd.DataFrame({"hour_of_year": year_hours}, index=date_range)
    
    # Now add day of week and hour of day columns
    df_year['day_name'] = df_year.index.day_name()
    df_year['hour_of_day'] = df_year.index.hour
    return df_year

df_year = create_year_range_df()
df_year.sample(5)

Unnamed: 0,hour_of_year,day_name,hour_of_day
2018-07-21 20:00:00,4844,Saturday,20
2018-04-16 04:00:00,2524,Monday,4
2018-10-15 23:00:00,6911,Monday,23
2018-10-30 15:00:00,7263,Tuesday,15
2018-06-20 13:00:00,4093,Wednesday,13


In [581]:
random.choice([1, 2, 3])

2

In [582]:
# TODO: (Problem)
# Using random.choice on a list of length 1 will always return the single value
# If there is only a single measured value in the list, it will always pick that one, 
# which is likely overrepresenting usage
# Options:
# 1. Always pad a list with a zero if there is only 1 value (this can be done easily in sample_usage())
# 2. Sample across multiple hours or days if there is only 1 value
# 3. Sample across multiple hours or days if the measured samples are below ~5-10
# 4. Always sample across multiple hours or days
def sample_usage(measured_usage, row):
    key = composite_key(row['day_name'], row['hour_of_day'])
    return random.choice(measured_usage[key])
    
# measured_usage[composite_key(row['day_name'], row['hour_of_day'])][0]
def generate_usage_profile(measured_usage, year=2018):
    df = create_year_range_df(year)
    df['welder'] = df.apply(partial(sample_usage, measured_usage), axis=1) 
    return df

df_usage_profile = generate_usage_profile(measured_usage)
df_usage_profile.head(20)

Unnamed: 0,hour_of_year,day_name,hour_of_day,welder
2018-01-01 00:00:00,0,Monday,0,0
2018-01-01 01:00:00,1,Monday,1,0
2018-01-01 02:00:00,2,Monday,2,0
2018-01-01 03:00:00,3,Monday,3,1
2018-01-01 04:00:00,4,Monday,4,0
2018-01-01 05:00:00,5,Monday,5,0
2018-01-01 06:00:00,6,Monday,6,0
2018-01-01 07:00:00,7,Monday,7,0
2018-01-01 08:00:00,8,Monday,8,0
2018-01-01 09:00:00,9,Monday,9,8


In [583]:
pivot_ui(df_usage_profile, 
         rows=['day_name', 'hour_of_day'],
         cols=['welder'],
         rendererName="Table",
         aggregatorName="Count")