In [12]:
import os
import json
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [13]:
print("Num GPUs Available: ", tf.config.list_physical_devices())

Num GPUs Available:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# No outlier treatment

In [14]:
station = 'C6.zip'

paths = [f'data/norm_data_01/{station}', f'data/norm_data_11/{station}']
input_width = [24, 48, 96]
neurons = [64]

for path in paths:
    df = pd.read_csv(path, compression='zip', header=0, sep=',')
    df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
    df = df.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
    
    min_maxs = json.load(open('data/min_maxs.json', 'r'))

    df['day'] = df['date'].dt.dayofyear / 365
    df['time'] = df['date'].dt.hour / 24

    df = df[['T', 'HR', 'P', 'u10', 'v10', 'day', 'time', 'date']]
    cols = ['T', 'HR', 'P', 'u10', 'v10']
    for width in input_width:
        prediction_width = 1

        df_train = df[df['date'] < '2019-01-01']
        df_test = df[df['date'] >= '2019-01-01']

        train_X = []
        train_Y = []

        for i in range(width, len(df_train) - prediction_width + 1):
            train_X.append(df_train.iloc[i - width:i][cols].values)
            train_Y.append(df_train.iloc[i:i + prediction_width][['T', 'u10', 'v10']].values)

        test_X = []
        test_Y = []

        for i in range(width, len(df_test) - prediction_width + 1):
            test_X.append(df_test.iloc[i - width:i][cols].values)
            test_Y.append(df_test.iloc[i:i + prediction_width][['T', 'u10', 'v10']].values)

        train_X = np.array(train_X)
        train_Y = np.array(train_Y)

        test_X = np.array(test_X)
        test_Y = np.array(test_Y)

        test_Y[:, 0, 0] = test_Y[:, 0, 0] * (min_maxs['T'][1] - min_maxs['T'][0]) + min_maxs['T'][0]
        test_Y[:, 0, 1] = test_Y[:, 0, 1] * (min_maxs['u10'][1] - min_maxs['u10'][0]) + min_maxs['u10'][0]
        test_Y[:, 0, 2] = test_Y[:, 0, 2] * (min_maxs['v10'][1] - min_maxs['v10'][0]) + min_maxs['v10'][0]

        for n in neurons:
            model = Sequential()
            model.add(LSTM(n, activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2])))
            model.add(Dropout(0.2))
            model.add(Dense(units=3, activation='linear'))

            model.compile(optimizer='adam', loss='mse', metrics=['mae'])
            print("Training model with input width: ", width, " and ", n, " neurons")
            with tf.device('/device:GPU:0'):
                history = model.fit(train_X, train_Y, epochs=15, batch_size=32, validation_split=0.1, verbose=1, shuffle=False)
            model.save(f'models/lstm_{path.split("_")[-1].split("/")[0]}_{n}_{width}_15epochs_32bs.h5')

            y_pred = model.predict(test_X)
            for idx, column in enumerate(['T', 'u10', 'v10']):
                plt.figure(figsize=(10, 6))

                y_pred[:, idx] = y_pred[:, idx] * (min_maxs[column][1] - min_maxs[column][0]) + min_maxs[column][0]
                
                plt.plot(test_Y[:1000, 0, idx], label='Real')
                plt.plot(y_pred[:1000, idx], label='Pred')
                
                rmse = np.sqrt(np.mean(np.power((test_Y[:, 0, idx] - y_pred[:, idx]), 2)))
                plt.legend()
                plt.title(f'{path.split("_")[-1].split("/")[0]}_{n}_{width}_15epochs_32bs_{column}, RMSE: {rmse:.6f}')
                plt.savefig(f'plots/{path.split("_")[-1].split("/")[0]}_{column}_{n}_{width}_15ep_32bs.png')
                plt.clf()

Training model with input width:  24  and  64  neurons
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Training model with input width:  48  and  64  neurons
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Training model with input width:  96  and  64  neurons
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Training model with input width:  24  and  64  neurons
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Training model with input width:  48  and  64  neurons
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

In [15]:
# copy_df = df.copy()
# copy_df.set_index('date', inplace=True)
# copy_df = copy_df[(copy_df.index.year == 2019) & (copy_df.index.month == 1)]
# groups = [0, 1, 2, 11, 12] 
# i = 1
# values = copy_df.values
# plt.figure(figsize=(15, 10))
# for group in groups:
#     plt.subplot(len(groups), 1, i)
#     plt.plot(values[:, group])
#     plt.title(copy_df.columns[group], y=0.5, loc='right')
#     i += 1
# plt.savefig('plots/variables_month.png')
# plt.show()
# del copy_df, groups, i, values

# Treated outliers

In [18]:
station = 'C6.zip'

paths = [f'data/norm_data_o01/{station}']#, f'data/norm_data_o11/{station}'

input_width = [24, 48, 96]
neurons = [64]

for path in paths:
    df = pd.read_csv(path, compression='zip', header=0, sep=',')
    df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
    df = df.astype({'T': 'float', 'HR': 'float', 'P': 'float', 'u2': 'float', 'v2': 'float', 'u6': 'float', 'v6': 'float', 'u10': 'float', 'v10': 'float', 'altitud': 'float', 'latitud': 'float', 'longitud': 'float'})
    
    min_maxs = json.load(open('data/quantiles.json', 'r'))

    df['day'] = df['date'].dt.dayofyear / 365
    df['time'] = df['date'].dt.hour / 24

    df = df[['T', 'HR', 'P', 'u10', 'v10', 'day', 'time', 'date']]
    cols = ['T', 'HR', 'P', 'u10', 'v10']
    for width in input_width:
        prediction_width = 1

        df_train = df[df['date'] < '2019-01-01']
        df_test = df[df['date'] >= '2019-01-01']

        train_X = []
        train_Y = []

        for i in range(width, len(df_train) - prediction_width + 1):
            train_X.append(df_train.iloc[i - width:i][cols].values)
            train_Y.append(df_train.iloc[i:i + prediction_width][['T', 'u10', 'v10']].values)

        test_X = []
        test_Y = []

        for i in range(width, len(df_test) - prediction_width + 1):
            test_X.append(df_test.iloc[i - width:i][cols].values)
            test_Y.append(df_test.iloc[i:i + prediction_width][['T', 'u10', 'v10']].values)

        train_X = np.array(train_X)
        train_Y = np.array(train_Y)

        test_X = np.array(test_X)
        test_Y = np.array(test_Y)

        test_Y[:, 0, 0] = test_Y[:, 0, 0] * (min_maxs['T'][1] - min_maxs['T'][0]) + min_maxs['T'][0]
        test_Y[:, 0, 1] = test_Y[:, 0, 1] * (min_maxs['u10'][1] - min_maxs['u10'][0]) + min_maxs['u10'][0]
        test_Y[:, 0, 2] = test_Y[:, 0, 2] * (min_maxs['v10'][1] - min_maxs['v10'][0]) + min_maxs['v10'][0]

        for n in neurons:
            model = Sequential()
            model.add(LSTM(n, activation='tanh', input_shape=(train_X.shape[1], train_X.shape[2])))
            model.add(Dropout(0.2))
            model.add(Dense(units=3, activation='linear'))

            model.compile(optimizer='adam', loss='mse', metrics=['mae'])

            with tf.device('/device:GPU:0'):
                history = model.fit(train_X, train_Y, epochs=15, batch_size=32, validation_split=0.1, verbose=1, shuffle=False)
            model.save(f'models/lstm_{path.split("_")[-1].split("/")[0]}_{n}_{width}_15epochs_32bs.h5')

            y_pred = model.predict(test_X)
            for idx, column in enumerate(['T', 'u10', 'v10']):
                plt.figure(figsize=(10, 6))

                y_pred[:, idx] = y_pred[:, idx] * (min_maxs[column][1] - min_maxs[column][0]) + min_maxs[column][0]
                
                plt.plot(test_Y[:1000, 0, idx], label='Real')
                plt.plot(y_pred[:1000, idx], label='Pred')
                
                rmse = np.sqrt(np.mean(np.power((test_Y[:, 0, idx] - y_pred[:, idx]), 2)))
                plt.legend()
                plt.title(f'{path.split("_")[-1].split("/")[0]}_{n}_{width}_15epochs_32bs_{column}, RMSE: {rmse:.6f}')
                plt.savefig(f'plots/{path.split("_")[-1].split("/")[0]}_{column}_{n}_{width}_15ep_32bs.png')
                plt.clf()

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

In [None]:
%reset -f