In [None]:
ds_name = "MHK"

In [None]:
stations_to_drop_uk = \
[(-51.317, -59.6),(53.5, -19.5),(47.5, -8.5), (54.017,1.1), (53.7,1.133), 
(59.733,1.667),(58.033,1.400), (57.283,1.650),(61.2, 1.1),(60.6 ,1.0),
(59.5, 1.5),(58.3, 0.4),(57.883, 0.033),(57.6, 1.7),(57.2, 1.2),
(57.2, 0.5),(54.0, 0.7),(53.833, 2.917),(53.5, 2.2),(53.4, 1.7),
(53.0, 2.1),(53.0, 1.7),(49.9, -2.9), (60.15, -2.067), (60.117, -2.067)]

In [None]:
def base_dataset():
    import Load_forecasting as lf
    df = lf.Load_Forecaster()
    stations_to_drop = []
    weather_ds = ["METAR_DATA/isd_stations_ny.txt",["METAR_DATA/isd_2001-2018_ny.txt"]]
    if ds_name == 'UK':
        df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NG_DATA/DATA/", location=ds_name)
        stations_to_drop = stations_to_drop_uk
        weather_ds = ["METAR_DATA/isd_stations_uk.txt",["METAR_DATA/isd_2005-2010_uk.txt", "METAR_DATA/isd_2011-2018_uk.txt"]]
    elif ds_name == 'NYC':
        df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NY_Data/DATA/zones/N.Y.C..csv", location=ds_name)
    elif ds_name == 'MHK':
        df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NY_Data/DATA/zones/MHK VL.csv", location=ds_name)
        
    df._override_model_data_settings(year_range=list(range(2008,2017+1)))
    return df, stations_to_drop, weather_ds

---
## Base dataset

In [None]:
### Location : NG - UK
### Year_range : 2008-2017
### + ALL DEFAULTS

### Load
## Preprocessing
# Fixes : ['zeros', 'extremes_global', 'derivatives_iterative'] (default)
## Historical : {'points':['7d'], 'averages':['1d', '7d']} (default)

### Weather : <No>
## Weather preprocessing : NA
# drop stations : NA
# desired_cluster_count : NA
# min_cluster_size : NA

### ML
# Time encoding : cylical (default)
# Stdz : RobustScaler (default)

In [None]:
df, _, _ = base_dataset()
df.process_data()
df.save_data(f'data_{ds_name}_base')

---

# Base dataset small

In [None]:
df, _, _ = base_dataset()
df._override_model_data_settings(train_test=(2,1), year_range=list(range(2008,2010+1)))
df.process_data()
df.save_data(f'data_{ds_name}_base_small')

# Base dataset week forecasting

In [None]:
df, _, _ = base_dataset()
df._override_model_data_settings(forecast_horizon='week')
df.process_data()
df.save_data(f'data_{ds_name}_week_forecasting')

---
## No historical load dataset

In [None]:
### Location : NG - UK
### Year_range : 2008-2017

### Load
## Preprocessing
# Fixes : ['zeros', 'extremes_global', 'derivatives_iterative'] (default)
## Historical : No <---

### Weather : <No>
## Weather preprocessing : NA
# drop stations : NA
# desired_cluster_count : NA
# min_cluster_size : NA

### ML
# Time encoding : cylical (default)
# Stdz : RobustScaler (default)

In [None]:
df, _, _ = base_dataset()
df._override_historical_load_propagation(points=[], averages=[])
df.process_data()
df.save_data(f'data_{ds_name}_no_weather_no_historical')

---

# More historical load propagation dataset 
### Point: '1d' was missing before... Also added '2d'

In [None]:
df, _, _ = base_dataset()
df._override_historical_load_propagation(points=['1d','2d','7d'], averages=['1d','2d','7d'])
df.process_data()
df.save_data(f'data_{ds_name}_no_weather_more_historical')

## Base + weather dataset

In [None]:
### Location : NG - UK
### Year_range : 2008-2017

### Load
## Preprocessing
# Fixes : ['zeros', 'extremes_global', 'derivatives_iterative'] (default)
## Historical : {'points':['7d'], 'averages':['1d', '7d']} (default)

### Weather : Yes
## Weather preprocessing : All default
# Variables : ['W_Spd', 'Air_Temp', 'RHx ']

### ML
# Time encoding : cylical (default)
# Stdz : RobustScaler (default)

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd', 'Air_Temp', 'RHx '],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather')

---

# Categorical time encoding

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df._override_time_encoding(mode='categorical')
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd', 'Air_Temp', 'RHx '],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather_categorical_time')

---

# No "advanced" load time series corrections

<font size=3>Can't remove 'zeros' corrections, otherwise cannot compute MAPE (inf) <br>
More specifically, the problem is that TinyDB cannot store "inf" values.</font>

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df._override_historical_load_preprocessing(fixes=['zeros'])
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd', 'Air_Temp', 'RHx '],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather_basic_load_corrections')

---

# Only air temperature

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['Air_Temp'],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather_air_only')

---

# Only wind

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd'],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather_wind_only')

---

# Only humidity

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['RHx '],
                  drop_stations=stations_to_drop)
df.process_data()
df.save_data(f'data_{ds_name}_weather_humidity_only')

---

# Less weather stations

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd', 'Air_Temp', 'RHx '],
                  drop_stations=stations_to_drop)
df._override_weather_data_settings(desired_cluster_count=3, min_cluster_size=3)
df.process_data()
df.save_data(f'data_{ds_name}_weather_less_stations')

---

# More weather variables
#### With UK, cannot go higher than 15

In [None]:
df, stations_to_drop, weather_ds = base_dataset()
df.attach_weather(filepath_stations=weather_ds[0],
                  filespath_data=weather_ds[1],
                  variables=['W_Spd', 'Air_Temp', 'RHx '],
                  drop_stations=stations_to_drop)
df._override_weather_data_settings(desired_cluster_count=40, min_cluster_size=2)
df.process_data()
df.save_data(f'data_{ds_name}_weather_more_stations')

---

