In [32]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split
import os, shutil

In [37]:
def load_data(version, mag, path):
    data1_2008 = pd.read_csv(path+'t_minus_1/gif_data_2008_'+version+'_'+mag+'.csv')
    data1_2016 = pd.read_csv(path+'t_minus_1/gif_data_2016_'+version+'_'+mag+'.csv')
    data2_2008 = pd.read_csv(path+'t_minus_2/gif_data_2008_'+version+'_'+mag+'.csv')
    data2_2016 = pd.read_csv(path+'t_minus_2/gif_data_2016_'+version+'_'+mag+'.csv')
    data3_2008 = pd.read_csv(path+'t_minus_3/gif_data_2008_'+version+'_'+mag+'.csv')
    data3_2016 = pd.read_csv(path+'t_minus_3/gif_data_2016_'+version+'_'+mag+'.csv')
    
    data = pd.DataFrame({'t-1': data1_2008['gif'], 't-2': data2_2008['gif'], 't-3': data3_2008['gif']})
    data = data.append(pd.DataFrame({'t-1': data1_2016['gif'], 't-2': data2_2016['gif'], 't-3': data3_2016['gif']}),ignore_index=True)
    data = data.drop([0])
    data['t'] = data1_2008['gif'].append(data1_2016['gif'],ignore_index=True)
    data['time'] = data1_2008['time'].append(data1_2016['time'],ignore_index=True)
    data = data.dropna()
    return data

def preprocessing(data, test_size_param=0.33):
    X_train, X_test, y_train, y_test = train_test_split(data[['t-1','t-2','t-3']].values,
                                                        data[['t']].values, test_size=test_size_param, random_state=42)
    X_train = tf.constant(X_train, np.float32)
    y_train = tf.constant(y_train, np.float32)
    X_test = tf.constant(X_test, np.float32)
    y_test = tf.constant(y_test, np.float32)

    return (X_train, X_test, y_train, y_test)

def do_the_graphics(target, predict, title, graphics_folder):
    plt.figure(figsize=(16,9), dpi=200)
    plt.title(title+' GIF')
    plt.plot(range(len(target)), target, alpha=1, label='y_test')
    plt.plot(range(len(target)), predict, alpha=0.5, label='y_predict')
    plt.legend()
    plt.savefig(graphics_folder+title+'_GIF.png')
    plt.show()

    plt.figure(figsize=(16,9), dpi=200)
    plt.title(title+' RMSE')
    plt.plot(range(len(target)), np.sqrt(tf.keras.losses.mse(target, predict)))
    plt.savefig(graphics_folder+title+'_RMSE.png')
    plt.show()

    plt.figure(figsize=(16,9), dpi=200)
    plt.title(title+' MAE')
    plt.plot(range(len(target)), tf.keras.losses.mae(target, predict))
    plt.savefig(graphics_folder+title+'_MAE.png')
    plt.show()

def DFANN_model(X_train, X_test, y_train, y_test, name, graphics_folder):
    neuron_number = 12 # 12 neurons in each Dense layer
    i_data_dim = 3 # 3 GIFs as input

    model_DFANN = Sequential()
    model_DFANN.add(Dense(neuron_number, activation='relu', input_dim=i_data_dim))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(1, activation='linear'))

    model_DFANN.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mae', 'mse'])

    print(model_DFANN.summary())
    
    history_train = model_DFANN.fit(X_train, y_train, epochs=100, verbose=0)
    y_predict = model_DFANN.predict(X_test)
    y_trainpredict = model_DFANN.predict(X_train)
    
    do_the_graphics(y_test, y_predict, 'GIF '+name+': test vs predicted (DFANN)', graphics_folder)
    do_the_graphics(y_train, y_trainpredict, 'GIF '+name+': train vs train predicted (DFANN)', graphics_folder)
    
def plot_history(history, name, graphics_folder):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure(figsize=(16,9), dpi=200)
    plt.title('GIF '+name+': MAE vs Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error')
    plt.plot(hist['epoch'], hist['mae'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
    plt.ylim([0,5])
    plt.legend()
    plt.savefig(graphics_folder+name+'_MAE_VS_EPOCH.png')

    plt.figure(figsize=(16,9), dpi=200)
    plt.title('GIF '+name+': RMSE vs Epoch')
    plt.xlabel('Epoch')
    plt.ylabel('Root Mean Square Error')
    plt.plot(hist['epoch'], np.sqrt(hist['mse']),
           label='Train Error')
    plt.plot(hist['epoch'], np.sqrt(hist['val_mse']),
           label = 'Val Error')
    plt.ylim([0,20])
    plt.legend()
    plt.savefig(graphics_folder+name+'_RMSE_VS_EPOCH.png')
    plt.show()
    
def DFANN_model_w_valset(X_train, X_test, y_train, y_test, name, graphics_folder):
    neuron_number = 12 # 12 neurons in each Dense layer
    i_data_dim = 3 # 3 GIFs as input

    model_DFANN = Sequential()
    model_DFANN.add(Dense(neuron_number, activation='relu', input_dim=i_data_dim))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(neuron_number, activation='relu'))
    model_DFANN.add(Dense(1, activation='linear'))

    model_DFANN.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mae', 'mse'])

    print(model_DFANN.summary())
    
    history = model_DFANN.fit(X_train, y_train, epochs=100, validation_split = 0.2, verbose=0)
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    
    y_predict = model_DFANN.predict(X_test)
    y_trainpredict = model_DFANN.predict(X_train)
    
    do_the_graphics(y_test, y_predict, 'GIF '+name+': test vs predicted (DFANNvalset)', graphics_folder)
    do_the_graphics(y_train, y_trainpredict, 'GIF '+name+': train vs train predicted (DFANNvalset)', graphics_folder)
    plot_history(history, name+'(DFANNvalset)', graphics_folder)
    
def UwU(folder_path):
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))
    
def main(names, data_path, graphics_folder):
    UwU(graphics_folder)
    for name,mag in names:
        try:
            df = load_data(name, mag, data_path)
            X_train, X_test, y_train, y_test = preprocessing(df)
            DFANN_model(X_train, X_test, y_train, y_test, name+mag, graphics_folder)
            DFANN_model_w_valset(X_train, X_test, y_train, y_test, name+mag, graphics_folder)
        except Exception as e:
            print('Failed because: %s' % e)
        

In [None]:
names = [('full','5.0'),('full','6.0'),('null','5.0'),('null','6.0')]

graphics_folder = '../others/Graphics/Japan/'
data_path = '../others/data_japan_50_60_2008_2016/data_japan/data_gif/'
main(names, data_path, graphics_folder)

Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_215 (Dense)            (None, 12)                48        
_________________________________________________________________
dense_216 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_217 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_218 (Dense)            (None, 12)                156       
_________________________________________________________________
dense_219 (Dense)            (None, 1)                 13        
Total params: 529
Trainable params: 529
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
graphics_folder = '../others/Graphics/Chile/'
data_path = '../others/data_50_60_2008_2016/data/data_gif/'

main(names, data_path, graphics_folder)