# Import Data

## Import the dataset

Import the dataset if you are in colab:

In [1]:
from google.colab import files
files.upload()
!ls

Saving completeDataset.csv to completeDataset.csv
completeDataset.csv  sample_data


Set the path to the csv file:

In [0]:
PATH_CSV = "completeDataset.csv"

## Function to import data

In [0]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

RANDOM_SHUFFLE_SEED = 22

#dataset
features_considered = ['IQ','pressure','wind_direction','wind_force','humidity','temperature']

DF = pd.read_csv(PATH_CSV, header=0, delimiter=';')
DF['date'] = pd.to_datetime(DF['date'],utc=True)


def importData(nb_prev_measures_for_predict):
    print("=====IMPORT=====")

    features = DF[features_considered]
    features.index = DF['date']

    dataset_test = features.values

    def higher_value(features,i):
        return[row[i] for row in dataset_test]

    max_pressure = max(higher_value(dataset_test,1))
    max_wind_force = max(higher_value(dataset_test,3))
    max_temperature = max(higher_value(dataset_test, 5))

    #normalize
    features['IQ'] = features['IQ'].apply(lambda x: x/10)
    features['pressure'] = features['pressure'].apply(lambda x: x/max_pressure)
    features['wind_force'] = features['wind_force'].apply(lambda x: x/max_wind_force)
    features['humidity'] = features['humidity'].apply(lambda x: x/100)
    features['temperature'] = features['temperature'].apply(lambda x: (x-273.15)/(max_temperature-273.15)) 

    #wind_direction to categorical
    features = pd.concat([features, pd.get_dummies(features['wind_direction'])], axis=1)
    features = features.drop(columns=["wind_direction"])

    x_train = []
    y_train = []
    countRow=0

    for indexRow, rowx in features.iterrows():
        # for each day we found with a value at 12:00
        if indexRow.hour == 12 and countRow >= nb_prev_measures_for_predict:
            try:
                # indexes for x (the range is inversed as our data are from the oldest to the newest)
                batchX = range(countRow, countRow - nb_prev_measures_for_predict, -1)
                # indexes for y
                batchY = [countRow+8,countRow+16,countRow+24]

                #application
                y_train.append(features.iloc[batchY]["IQ"].values)
                x_train.append(features.iloc[batchX].values)
            except:
                print("To long for ",indexRow)
        countRow+=1
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    
    x_train,y_train = shuffle(x_train,y_train, random_state=RANDOM_SHUFFLE_SEED)
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    y_train = y_train.reshape(y_train.shape[0],3,1)
    print("x_train :",x_train.shape)
    print("y_train :",y_train.shape)
    
    print("====END IMPORT====")
    return(x_train,y_train)

## Function to save the results

To save the results in a csv file:

In [4]:
!mkdir logs
!ls

completeDataset.csv  logs  sample_data


In [5]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [0]:
# code found here: https://stackoverflow.com/questions/42355122/can-i-export-a-tensorflow-summary-to-csv
import os
import numpy as np
import pandas as pd

import tensorflow as tf

from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator


def tabulate_events(dpath):

    final_out = {}
    for dname in os.listdir(dpath):
        ea = EventAccumulator(os.path.join(dpath, dname)).Reload()
        tags = ea.Tags()['scalars']

        out = {}

        for tag in tags:
            tag_values=[]
            wall_time=[]
            steps=[]

            for event in ea.Scalars(tag):
                tag_values.append(event.value)
                wall_time.append(event.wall_time)
                steps.append(event.step)

            out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time'])

        if len(tags)>0:      
            df= pd.concat(out.values(),keys=out.keys())

        final_out[dname] = df


    return final_out

def saveProgressCSV():
  path = "logs/"
  steps = tabulate_events(path)
  pd.concat(steps.values(),keys=steps.keys()).to_csv('all_result.csv')

In [7]:
print(os.getcwd())
print(os.listdir())

/content
['.config', 'completeDataset.csv', 'logs', 'sample_data']


# Test differents models

##Librairies:

In [8]:
from keras.optimizers import RMSprop,Adam
from keras.callbacks import TensorBoard,EarlyStopping
from time import time
from keras.models import Model
from keras.layers import LSTM, Dense, Input, GRU, BatchNormalization, Dropout, Flatten
from keras.models import save_model
from datetime import datetime
from sklearn.model_selection import KFold

Using TensorFlow backend.


## Accuracy:


In [0]:
def calc_accuracy_3days(y_pred,y_true):
    """
    calculate the accurracy of the prediction according to the true value for each day [D+1, D+2, D+3]
    """
    accuracy = [0,0,0]
    for pred,true in zip (y_pred,y_true):
        for plusDay in range(3):
            if true[plusDay] == np.around(pred[plusDay],2):
                accuracy[plusDay]+=1
    return(np.array(accuracy)/len(y_true)*100)

dicAccuracy = {}

## Parameters:

In [0]:
# PARAMS TO TEST
EPOCHS = 100

## Main script

### Manual script:

In [11]:
x_train,y_train = importData(32)
trainLength = int(len(x_train)*0.8)
x,x_val = x_train[:trainLength],x_train[trainLength:]
y,y_val = y_train[:trainLength],y_train[trainLength:]

=====IMPORT=====


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

To long for  2020-02-27 12:00:00+00:00
To long for  2020-02-28 12:00:00+00:00
To long for  2020-02-29 12:00:00+00:00
x_train : (1877, 32, 42)
y_train : (1877, 3, 1)
====END IMPORT====


In [12]:
OPTI = "Adam"
hiddenUnit = 512
gruNext = 128
denseSize = 256

# Model
input_shape = (x_train.shape[-2],x_train.shape[-1])
inp = Input(input_shape,name='inputLayer')
_ = GRU(hiddenUnit,input_shape=input_shape,name='GRUcommonLayer1')(inp)
_ = Dense(denseSize,name="intermediateDenseLayer")(_)
_ = Dropout(0.2)(_)

outD1 = Dense(1,name="D1")(_)

outD2 = Dense(1,name="D2")(_)

outD3 = Dense(1,name="D3")(_)
  
#compile
model = Model(inputs=[inp], outputs=[outD1, outD2, outD3])    
model.compile(optimizer=OPTI, loss={'D1': 'mse', 'D2': 'mse', 'D3': 'mse'}, metrics={'D1': 'mae', 'D2': 'mae', 'D3': 'mae'})
model.summary()

#train:
model.fit(x=x, y=[y[:,0],y[:,1],y[:,2]], validation_data=(x_val,[y_val[:,0],y_val[:,1],y_val[:,2]]),epochs=EPOCHS,verbose=0)

#accuracy:
y_pred = model.predict(x_val)
y_val_visual = np.array(y_val)
y_pred_visual = np.array(y_pred).reshape(y_val_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_val_visual)  
print("accuracy val: ",acc)

y_pred = model.predict(x_train)
y_train_visual = np.array(y_train)
y_pred_visual = np.array(y_pred).reshape(y_train_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_train_visual)  
print("accuracy all dataset: ",acc)





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputLayer (InputLayer)         (None, 32, 42)       0                                            
__________________________________________________________________________________________________
GRUcommonLayer1 (GRU)           (None, 512)          852480      inputLayer[0][0]                 
__________________________________________________________________________________________________
intermediateDenseLayer (Dense)  (None, 256)          131328      GRUcommonLayer1[0][0]            
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 256)          0           interm

In [13]:
# Model
input_shape = (x_train.shape[-2],x_train.shape[-1])
inp = Input(input_shape,name='inputLayer')
_ = GRU(512,input_shape=input_shape,name='GRUcommonLayer1',return_sequences=True)(inp)
_ = GRU(512,input_shape=input_shape,name='GRUcommonLayer2')(_)
_ = Dropout(0.2)(_)

outD1 = Dense(1,name="D1")(_)

outD2 = Dense(1,name="D2")(_)

outD3 = Dense(1,name="D3")(_)
  
#compile
model = Model(inputs=[inp], outputs=[outD1, outD2, outD3])    
model.compile(optimizer=OPTI, loss={'D1': 'mse', 'D2': 'mse', 'D3': 'mse'}, metrics={'D1': 'mae', 'D2': 'mae', 'D3': 'mae'})
model.summary()

#train:
model.fit(x=x, y=[y[:,0],y[:,1],y[:,2]], validation_data=(x_val,[y_val[:,0],y_val[:,1],y_val[:,2]]),epochs=EPOCHS,verbose=0)

#accuracy:
y_pred = model.predict(x_val)
y_val_visual = np.array(y_val)
y_pred_visual = np.array(y_pred).reshape(y_val_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_val_visual)  
print("accuracy val: ",acc)

y_pred = model.predict(x_train)
y_train_visual = np.array(y_train)
y_pred_visual = np.array(y_pred).reshape(y_train_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_train_visual)  
print("accuracy all dataset: ",acc)

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputLayer (InputLayer)         (None, 32, 42)       0                                            
__________________________________________________________________________________________________
GRUcommonLayer1 (GRU)           (None, 32, 512)      852480      inputLayer[0][0]                 
__________________________________________________________________________________________________
GRUcommonLayer2 (GRU)           (None, 512)          1574400     GRUcommonLayer1[0][0]            
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 512)          0           GRUcommonLayer2[0][0]            
____________________________________________________________________________________________

In [14]:
# Model
input_shape = (x_train.shape[-2],x_train.shape[-1])
inp = Input(input_shape,name='inputLayer')
_ = GRU(512,input_shape=input_shape,name='GRUcommonLayer1',return_sequences=True)(inp)
_ = Dropout(0.2)(_)

outD1 = GRU(128)(_)
outD1 = Dense(1,name="D1")(outD1)

outD2 = GRU(128)(_)
outD2 = Dense(1,name="D2")(outD2)

outD3 = GRU(128)(_)
outD3 = Dense(1,name="D3")(outD3)
  
#compile
model = Model(inputs=[inp], outputs=[outD1, outD2, outD3])    
model.compile(optimizer=OPTI, loss={'D1': 'mse', 'D2': 'mse', 'D3': 'mse'}, metrics={'D1': 'mae', 'D2': 'mae', 'D3': 'mae'})
model.summary()

#train:
model.fit(x=x, y=[y[:,0],y[:,1],y[:,2]], validation_data=(x_val,[y_val[:,0],y_val[:,1],y_val[:,2]]),epochs=EPOCHS,verbose=0)

#accuracy:
y_pred = model.predict(x_val)
y_val_visual = np.array(y_val)
y_pred_visual = np.array(y_pred).reshape(y_val_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_val_visual)  
print("accuracy val: ",acc)

y_pred = model.predict(x_train)
y_train_visual = np.array(y_train)
y_pred_visual = np.array(y_pred).reshape(y_train_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_train_visual)  
print("accuracy all dataset: ",acc)

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputLayer (InputLayer)         (None, 32, 42)       0                                            
__________________________________________________________________________________________________
GRUcommonLayer1 (GRU)           (None, 32, 512)      852480      inputLayer[0][0]                 
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 32, 512)      0           GRUcommonLayer1[0][0]            
__________________________________________________________________________________________________
gru_1 (GRU)                     (None, 128)          246144      dropout_3[0][0]                  
____________________________________________________________________________________________

In [15]:
# Model
input_shape = (x_train.shape[-2],x_train.shape[-1])
inp = Input(input_shape,name='inputLayer')
_ = Dense(512,name='DensecommonLayer1')(inp)
_ = Dropout(0.2)(_)

outD1 = GRU(128)(_)
outD1 = Dense(1,name="D1")(outD1)

outD2 = GRU(128)(_)
outD2 = Dense(1,name="D2")(outD2)

outD3 = GRU(128)(_)
outD3 = Dense(1,name="D3")(outD3)
  
#compile
model = Model(inputs=[inp], outputs=[outD1, outD2, outD3])    
model.compile(optimizer=OPTI, loss={'D1': 'mse', 'D2': 'mse', 'D3': 'mse'}, metrics={'D1': 'mae', 'D2': 'mae', 'D3': 'mae'})
model.summary()

#train:
model.fit(x=x, y=[y[:,0],y[:,1],y[:,2]], validation_data=(x_val,[y_val[:,0],y_val[:,1],y_val[:,2]]),epochs=EPOCHS,verbose=0)

#accuracy:
y_pred = model.predict(x_val)
y_val_visual = np.array(y_val)
y_pred_visual = np.array(y_pred).reshape(y_val_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_val_visual)  
print("accuracy val: ",acc)

y_pred = model.predict(x_train)
y_train_visual = np.array(y_train)
y_pred_visual = np.array(y_pred).reshape(y_train_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_train_visual)  
print("accuracy all dataset: ",acc)

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputLayer (InputLayer)         (None, 32, 42)       0                                            
__________________________________________________________________________________________________
DensecommonLayer1 (Dense)       (None, 32, 512)      22016       inputLayer[0][0]                 
__________________________________________________________________________________________________
dropout_4 (Dropout)             (None, 32, 512)      0           DensecommonLayer1[0][0]          
__________________________________________________________________________________________________
gru_4 (GRU)                     (None, 128)          246144      dropout_4[0][0]                  
____________________________________________________________________________________________

In [16]:
# Model
input_shape = (x_train.shape[-2],x_train.shape[-1])
inp = Input(input_shape,name='inputLayer')
_ = Dense(512,name='DensecommonLayer1')(inp)
_ = Dropout(0.3)(_)

outD1 = Dense(128)(_)
outD1 = Dropout(0.3)(outD1)
outD1 = Flatten()(outD1)
outD1 = Dense(1,name="D1")(outD1)

outD2 = Dense(128)(_)
outD2 = Dropout(0.3)(outD2)
outD2 = Flatten()(outD2)
outD2 = Dense(1,name="D2")(outD2)

outD3 = Dense(128)(_)
outD3 = Dropout(0.3)(outD3)
outD3 = Flatten()(outD3)
outD3 = Dense(1,name="D3")(outD3)
  
#compile
model = Model(inputs=[inp], outputs=[outD1, outD2, outD3])    
model.compile(optimizer=OPTI, loss={'D1': 'mse', 'D2': 'mse', 'D3': 'mse'}, metrics={'D1': 'mae', 'D2': 'mae', 'D3': 'mae'})
model.summary()

#train:
model.fit(x=x, y=[y[:,0],y[:,1],y[:,2]], validation_data=(x_val,[y_val[:,0],y_val[:,1],y_val[:,2]]),epochs=EPOCHS,verbose=0)

#accuracy:
y_pred = model.predict(x_val)
y_val_visual = np.array(y_val)
y_pred_visual = np.array(y_pred).reshape(y_val_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_val_visual)  
print("accuracy val: ",acc)

y_pred = model.predict(x_train)
y_train_visual = np.array(y_train)
y_pred_visual = np.array(y_pred).reshape(y_train_visual.shape)
acc = calc_accuracy_3days(y_pred_visual,y_train_visual)  
print("accuracy all dataset: ",acc)

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputLayer (InputLayer)         (None, 32, 42)       0                                            
__________________________________________________________________________________________________
DensecommonLayer1 (Dense)       (None, 32, 512)      22016       inputLayer[0][0]                 
__________________________________________________________________________________________________
dropout_5 (Dropout)             (None, 32, 512)      0           DensecommonLayer1[0][0]          
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 32, 128)      65664       dropout_5[0][0]                  
____________________________________________________________________________________________