In [39]:
import numpy as np
import pandas as pd
import seaborn as sns
import json
import pprint
from datetime import datetime

In [40]:
#Format into dates
def format_time(unix_timestamp):
    format = '%Y-%m-%d'
    ts = int(unix_timestamp)
    return datetime.utcfromtimestamp(ts).strftime(format)

#Create list of datetime objects from timestamps
def gen_datetimes(li):
    datetimes = []
    for time in li:
        datetimes.append(datetime.utcfromtimestamp(time))
    return datetimes

#Get day of week based on date
def get_weekday(dates):
    return [1 if (datetime.strptime(d,"%Y-%m-%d").weekday() >= 5) else 0 for d in dates]

def get_weekday2(year, month, day):
    dates = pd.DataFrame()
    dates['y'] = year
    dates['m'] = month
    dates['d'] = day
    dates['dates'] = dates['y'].astype('str') + '-' + dates['m'].astype('str') + '-' + dates['d'].astype('str')
    return get_weekday(dates['dates'])

In [41]:
df_power = pd.read_csv('Datasets/UIUC_PowerConsumption.csv')
df_power.head()

Unnamed: 0,Date,Power
0,10/1/2020,6102
1,10/2/2020,6052
2,10/3/2020,5953
3,10/4/2020,5990
4,10/5/2020,6040


In [42]:
df_power[['Month', 'Day', 'Year']] = df_power['Date'].str.split('/', expand=True).apply(pd.to_numeric)
df_power = df_power.drop(['Date'], axis = 1)
df_power.head()

Unnamed: 0,Power,Month,Day,Year
0,6102,10,1,2020
1,6052,10,2,2020
2,5953,10,3,2020
3,5990,10,4,2020
4,6040,10,5,2020


In [43]:
def clean_data(location, skiprows = 0):
    df_weather = pd.read_csv(location, skiprows=skiprows)
    df_weather = df_weather.drop(columns=['Hour', 'Minute'])
    df_weather = df_weather[df_weather.DNI != 0]

    #Take mean, max and min for each DNI in DataFrame and mean for everything else
    
    max_dni = df_weather.groupby(['Year', 'Month', 'Day']).max().reset_index()['DNI']
    min_dni = df_weather.groupby(['Year', 'Month', 'Day']).min().reset_index()['DNI']
    
    df_weather = pd.DataFrame(df_weather.groupby(['Year', 'Month', 'Day']).mean().reset_index())
    
    df_weather.insert(4, 'DNI Max', max_dni)
    df_weather.insert(5, 'DNI Min', min_dni)
    
    return df_weather

def training_prep(data, square_feet, building_type):
    data = data.loc[:, ['Year', 'Month', 'Day', 'DNI', 'DNI Max', 'DNI Min', 'Wind Speed', 'Precipitable Water', 'Wind Direction', 'Relative Humidity', 'Temperature', 'Pressure']]
    data.loc[:, 'Weekday'] = get_weekday2(data['Year'], data['Month'], data['Day'])
    data = data.drop(['Year'], axis=1)
    data.loc[:, 'Square Feet'] = square_feet
    data.loc[:, 'Type'] = building_type
    return data

In [44]:
df_weather = training_prep(clean_data('Datasets/UrbanaChampaign_Weather.csv'), 66000, 2)
df_weather.head()

Unnamed: 0,Month,Day,DNI,DNI Max,DNI Min,Wind Speed,Precipitable Water,Wind Direction,Relative Humidity,Temperature,Pressure,Weekday,Square Feet,Type
0,1,1,11.0,11,11,3.0,0.9,19.0,88.86,-0.9,997.0,0,66000,2
1,1,2,45.5,160,2,3.65,0.866667,244.666667,77.95,-0.283333,995.333333,0,66000,2
2,1,3,715.8,940,177,3.17,0.56,221.1,68.032,1.31,991.4,0,66000,2
3,1,4,296.6,839,5,1.26,0.75,182.8,65.585,4.69,981.7,0,66000,2
4,1,5,654.0,909,69,3.24,1.1,258.4,81.814,6.51,987.0,1,66000,2


In [45]:
df_merge = df_weather.merge(df_power[['Power', 'Month', 'Day']], left_on=('Month', 'Day'), right_on=('Month', 'Day'), how = 'inner', suffixes=('_left','_right'))
df_merge = df_merge.rename(columns={'Power': 'Power Consumption', 'DNI':'DNI Mean'})
df_merge.head()

Unnamed: 0,Month,Day,DNI Mean,DNI Max,DNI Min,Wind Speed,Precipitable Water,Wind Direction,Relative Humidity,Temperature,Pressure,Weekday,Square Feet,Type,Power Consumption
0,10,1,597.916667,898,86,3.675,3.825,220.083333,67.045,28.658333,988.333333,0,66000,2,6102
1,10,2,332.25,772,20,3.908333,4.441667,270.5,72.508333,26.675,986.25,0,66000,2,6052
2,10,3,408.0,923,47,5.088889,1.966667,312.333333,62.97,20.988889,991.555556,0,66000,2,5953
3,10,4,617.25,931,48,3.5,1.333333,34.666667,60.214167,16.258333,999.416667,0,66000,2,5990
4,10,5,309.4,810,4,2.98,2.73,148.0,58.441,18.19,990.5,1,66000,2,6040


In [46]:
df_merge.to_csv('Datasets/UIUC_PowerWeatherData.csv', index=False)