In [12]:
import pandas as pd
import seaborn as sns
from scripts.data_processing import process_data
import os
import numpy as np
import time
import matplotlib.pyplot as plt

In [13]:
df = pd.read_csv('../data/clean/processed_met_data.csv')

In [14]:
def met_season(month):    
    if month in [12, 1, 2]:
        return 'summer'
    elif month in [3, 4, 5]:
        return 'fall'
    elif month in [6, 7, 8]:
        return 'winter'
    elif month in [9, 10, 11]:
        return 'spring'

In [15]:
df['date'] = pd.to_datetime(df['date'])

In [16]:
df['season'] = df.apply(lambda x: met_season(x['date'].month), axis = 1)

In [17]:
def calculate_perceived_temperature(celsius, humidity, wind_speed):
    if celsius >= 27:
        f = celsius * 1.8 + 32
        return -42.379 + (2.04901523*f) + (10.14333127*humidity) - (.22475541*f*humidity) - (.00683783*f*f) - (.05481717*humidity*humidity) + (.00122874*f*f*humidity) + (.00085282*f*humidity*humidity) - (.00000199*f*f*humidity*humidity)
    elif celsius <= 10:
        return (12.1452 + 11.6222 * np.sqrt(wind_speed) - 1.16222 * wind_speed) * (33 - celsius)
    else:
        return celsius

In [18]:
df['perceived_temperature'] = df.apply(lambda x: 
                                       calculate_perceived_temperature(
                                           x['air_temp_dry_bulb'], 
                                           x['relative_humidity'], 
                                           x['wind_speed']), 
                                       axis = 1)

In [19]:
df['lag_1h'] = df['energy_demand'].shift(1, fill_value = 0)
df['lag_6h'] = df['energy_demand'].shift(6, fill_value = 0)
df['lag_12h'] = df['energy_demand'].shift(12, fill_value = 0)
df['lag_24h'] = df['energy_demand'].shift(24, fill_value = 0)
df['lag_48h'] = df['energy_demand'].shift(48, fill_value = 0)
df['lag_96h'] = df['energy_demand'].shift(96, fill_value = 0)
df['lag_192h'] = df['energy_demand'].shift(192, fill_value = 0)
df['lag_384h'] = df['energy_demand'].shift(384, fill_value = 0)
df['lag_744h'] = df['energy_demand'].shift(744, fill_value = 0)

In [22]:
df_dummied = pd.get_dummies(df.copy(), columns = ['season']).drop('season_spring', axis = 1)

In [23]:
df_dummied.to_csv('../data/clean/processed_data.csv', index = False)