# Model code

In [141]:
import sys
sys.path.append('/opt/datadriver/workspace/')

import h5py
import io
import numpy as np
from PIL import Image
import urllib
import os
import pandas as pd
import scipy.io
from smartgrid.image_processing.solar_angles_computation import *
import datetime
import pickle
import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from ipywidgets import IntSlider, Label
from ipywidgets.embed import embed_minimal_html
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

## Parameters for adnot model 

In [142]:
p = np.array([ 8.20630791e+01, -1.13556174e+00,  1.15810318e+03,  1.56397451e-02,
        1.07080192e+01,  1.20840451e+00])

## Loading file & preparing data 

In [143]:
def compute_GHI(zenith, doy, p):

    ClearSkyGHI = (p[0] * np.cos(np.deg2rad(360 / 365.25 * doy + p[1])) + p[2]) * np.cos(np.deg2rad(zenith)) ** (p[3] * np.cos(np.deg2rad(360 / 365.25 * doy + p[4])) + p[5])

    return ClearSkyGHI

In [144]:
def load_and_prepare_dataset(dataset_filename="../data/processed/model_training_dataset.pickle"):
    if os.path.isfile(dataset_filename):
        dataset = pd.read_pickle(dataset_filename)
        return dataset
    data = pd.read_csv('./dataframe_input_random_forest.csv')
    CMS = pd.read_csv('./CMS_EXP_Walon_Donnees_1minute.dat', skiprows=[0,2,3], low_memory=False)
    GHI = CMS[['TIMESTAMP','RG_moy_0_I']]
    GHI.TIMESTAMP = pd.to_datetime(GHI.TIMESTAMP)
    data.date = pd.to_datetime(data.date)
    data['doy'] = data.date.dt.dayofyear
    
    azimuth = list()
    block_matching_vx = list()
    block_matching_vy = list()
    brightness = list()
    zenith = list()
    cloud_index_walon = list()
    datas = list()
    for i in range (0, data.shape[0]):
        temp = list()
        azimuth.append(list(filter(None,data.azimuth[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))
        block_matching_vx.append(list(filter(None,data.block_matching_vx[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))
        block_matching_vy.append(list(filter(None,data.block_matching_vy[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))    
        brightness.append(list(filter(None,data.brightness[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))    
        zenith.append(list(filter(None,data.zenith[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))    
        cloud_index_walon.append(list(filter(None,data.cloud_index_walon[i].replace('[' , '').replace(']','').replace('\n','').split(' '))))  
        temp.extend(block_matching_vx[i])
        temp.extend(block_matching_vy[i])
        temp.extend(cloud_index_walon[i])
        temp.extend(brightness[i])
        datas.append(temp)
    zen_moy = list()
    for element in zenith :
        zen_moy.append(sum([float(i) for i in element]) / len(element))
    data['zen_moy'] = zen_moy
    data['GHI_ClearSKY'] = data.apply(lambda x : compute_GHI(p=p, doy=x.doy,zenith=x.zen_moy),axis=1)
    
    # set targets
    GHI.index = GHI.TIMESTAMP
    GHI['target_0'] = GHI.RG_moy_0_I
    GHI['target_15'] = GHI.RG_moy_0_I.shift(-15)
    GHI['target_60'] = GHI.RG_moy_0_I.shift(-60)
    
    data = pd.merge(left=data, right=GHI, left_on='date', right_on = 'TIMESTAMP').drop("TIMESTAMP",1)
    data['target_0'] = data.target_0 / data.GHI_ClearSKY
    data['target_15'] = data.target_15 / data.GHI_ClearSKY
    data['target_60'] = data.target_60 / data.GHI_ClearSKY
    
    ## Creating dataset for training
    dataset = pd.DataFrame(datas)
    dataset['target_0'] = data.target_0
    dataset['target_15'] = data.target_15
    dataset['target_60'] = data.target_60
    dataset = dataset.dropna()
    
    dataset.to_pickle(dataset_filename)
    return dataset

dataset = load_and_prepare_dataset()

## Training Regression

In [145]:
def train_model(n_estimators=200, max_depth=7, criterion='mae'):
    model = RandomForestRegressor(n_estimators=n_estimators,
                                     min_samples_leaf=5,
                                     min_samples_split =10,
                                     max_depth = max_depth,
                                     n_jobs=6,
                                     random_state=42,
                                     criterion = criterion)
    model.fit(dataset[dataset.target_0 < 2].drop(['target_0','target_15','target_60'],1), 
              dataset[dataset.target_0 < 2]['target_0'])
    pickle.dump(model, open('./RF.pickle', 'wb'))
    model.fit(dataset[dataset.target_15 < 2].drop(['target_0','target_15','target_60'],1), 
              dataset[dataset.target_15 < 2]['target_15'])
    pickle.dump(model, open('./RF_15.pickle', 'wb'))
    model.fit(dataset[dataset.target_60 < 2].drop(['target_0','target_15','target_60'],1), 
              dataset[dataset.target_60 < 2]['target_60'])
    pickle.dump(model, open('./RF_60.pickle', 'wb'))

In [146]:
train_ui = interact_manual(train_model, 
                n_estimators=widgets.IntSlider(value=200, start=50, stop=1000, step=50),
                max_depth=widgets.IntSlider(value=7, start=2, stop=10, step=1),
                criterion=widgets.Select(options=['mae', 'mse'], value='mae'))

aW50ZXJhY3RpdmUoY2hpbGRyZW49KEludFNsaWRlcih2YWx1ZT0xMDAsIGRlc2NyaXB0aW9uPXUnbl9lc3RpbWF0b3JzJywgc3RlcD01MCksIEludFNsaWRlcih2YWx1ZT03LCBkZXNjcmlwdGnigKY=


# UI

In [147]:
tab = widgets.Tab()

names = ['Training', 'Output']
tab.children = [
    widgets.VBox(
        [Label("Model parameters"),
         train_ui.widget
        ]
    ),
    widgets.VBox([]
    )
]

for i in range(len(tab.children)):
    tab.set_title(i, names[i])
    
items = []
items.append(Label("Retrain PCT model"))
items.append(tab)

widgets.VBox(items)

VkJveChjaGlsZHJlbj0oTGFiZWwodmFsdWU9dSdSZXRyYWluIFBDVCBtb2RlbCcpLCBUYWIoY2hpbGRyZW49KFZCb3goY2hpbGRyZW49KExhYmVsKHZhbHVlPXUnTW9kZWwgcGFyYW1ldGVycyfigKY=
