In [196]:
# Reset all variables so that we can 'run all cells' and not get unused variables hanging around
# %reset -f

In [231]:
import pandas as pd
import numpy as np
from pivottablejs import pivot_ui
import random
import json
from collections import defaultdict

In [198]:
df = pd.read_csv("data/welder_time_local.csv").set_index('time_local')
df.sample(10)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-17 05:00:00+03:00,0,Monday,5,0,0_Monday
2018-09-20 21:00:00+03:00,0,Thursday,21,3,3_Thursday
2018-09-11 09:00:00+03:00,0,Tuesday,9,1,1_Tuesday
2018-09-12 17:00:00+03:00,0,Wednesday,17,2,2_Wednesday
2018-09-19 02:00:00+03:00,0,Wednesday,2,2,2_Wednesday
2018-09-20 07:00:00+03:00,0,Thursday,7,3,3_Thursday
2018-09-18 07:00:00+03:00,0,Tuesday,7,1,1_Tuesday
2018-09-18 19:00:00+03:00,0,Tuesday,19,1,1_Tuesday
2018-09-16 00:00:00+03:00,0,Sunday,0,6,6_Sunday
2018-09-19 01:00:00+03:00,0,Wednesday,1,2,2_Wednesday


In [199]:
# To come up with the config options, use this example to output the config: 
# https://pivottable.js.org/examples/onrefresh.html
pivot_ui(df, 
         rows=['day'],
         cols=['hour'],
         rendererName="Table Barchart",
         aggregatorName="Average",
         vals=["welder_is_on"])

In [200]:
pivot_ui(df, 
         rows=['day'],
         cols=['hour'],
         rendererName="Table Barchart",
         aggregatorName="Sum over Sum",
         vals=["welder_is_on", "hour"])

## Example functions for next section:

In [201]:
measured = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
s1 = random.choice(measured)
s1

9

In [202]:
df[df['day_name'] == 'Monday'].sample(3)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-17 12:00:00+03:00,0,Monday,12,0,0_Monday
2018-09-17 10:00:00+03:00,0,Monday,10,0,0_Monday
2018-09-10 10:00:00+03:00,0,Monday,10,0,0_Monday


## Create dataset for the load profile generator

Take a sample of every hour of every weekday: So for example:

```
measured_usage = {
  fri_09: [0, 1, 0, 3, 0, 1, 1]     # Friday @ 9am
  sat_10: [0, 0, 1, 30, 10, 0, 2]   # Saturday @ 10am
  ...
}

```

In [203]:
day_range = range(7)
hour_range = range(25)

In [210]:
df.sample(5)

Unnamed: 0_level_0,welder_is_on,day_name,hour,day_index,day
time_local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-09-21 01:00:00+03:00,0,Friday,1,4,4_Friday
2018-09-16 12:00:00+03:00,3,Sunday,12,6,6_Sunday
2018-09-18 18:00:00+03:00,0,Tuesday,18,1,1_Tuesday
2018-09-16 11:00:00+03:00,0,Sunday,11,6,6_Sunday
2018-09-11 02:00:00+03:00,0,Tuesday,2,1,1_Tuesday


In [219]:
# Shortens a day name to the first 3 letters (Saturday => sat)
def shorten_day_name(day_string):
    return day_string[0:3].lower()

# Python allows tuples as dict keys. For example: ("Saturday" 10): [0, 0, 2, 4, 0]
# But this data structure will be exported to JSON, which only allows string keys. 
# So the key generated by this function will look like this: "sat_10": [0, 0, 2, 4, 0]
def composite_key(day_name, hour):
    padded_hour = str(hour).zfill(2)
    return "{}_{}".format(shorten_day_name(day_name), padded_hour)

In [220]:
# TODO: document this function
def create_usage_profile_data(df):
    dict = defaultdict(list)
    for index, row in df.iterrows():
        key = composite_key(row['day_name'], row['hour'])
        dict[key].append(row['welder_is_on'])
    return dict

usage_profile_data = create_usage_profile_data(df)
usage_profile_data

defaultdict(list,
            {'fri_00': [0, 0],
             'fri_01': [0, 0],
             'fri_02': [0, 0],
             'fri_03': [0],
             'fri_04': [0],
             'fri_05': [0],
             'fri_06': [0],
             'fri_07': [0],
             'fri_08': [9],
             'fri_09': [5],
             'fri_10': [0],
             'fri_11': [0],
             'fri_12': [0],
             'fri_13': [0],
             'fri_14': [0],
             'fri_15': [0],
             'fri_16': [0],
             'fri_17': [0],
             'fri_18': [0],
             'fri_19': [0],
             'fri_20': [0],
             'fri_21': [0],
             'fri_22': [0],
             'fri_23': [0],
             'mon_00': [0, 0],
             'mon_01': [0, 0],
             'mon_02': [0, 0],
             'mon_03': [1, 0],
             'mon_04': [0, 0],
             'mon_05': [0, 0],
             'mon_06': [0, 0],
             'mon_07': [0, 0],
             'mon_08': [0, 0],
             'mon_09':

In [229]:
# Make sure there is a key for every hour of every day of the week  
# represented: (7x24 = 168) 
len(usage_profile_data) == 7*24

True

In [232]:
# This dataset is everything the web app needs to generate a 52-week
# usage profile based on sampling (more on that below). 
# Output to JSON so it can be imported into the app
with open('data/welder_usage_generator_data.json', 'w') as fp:
    json.dump(usage_profile_data, fp)

## Create Usage Profile Generator
Day 1 of the year (Sat @ 10am): sample(measured_usage.sat_10)

Day 2 of the year (Frid @ 9am): sample(measured_usage.sat_10)

Check: see if the sum, avg and std roughly matches the measured values