In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from functions import  SP_Learner, interpolate
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib
from matplotlib import rc
import warnings
from pathlib import Path

warnings.filterwarnings('ignore')
rc("text", usetex=False)
matplotlib.rcParams.update({'font.size': 14})
sns.set(font_scale = 2)

model_select =  ['SPATIAL', 'CNN', 'LSTM']


# Select which models to run
Choice are `['SPATIAL', 'CNN', 'LSTM']` and default is that all models are run

In [None]:
model_select =  ['SPATIAL']

# Select which dataset to run for [ADCP, Temperature, or Dissolved oxygen]

In [None]:
['ADCP_sensor_', 'Oxygen_Sensor', 'Temp_Sensor']
ts_stem = 'ADCP_sensor_' 


In [None]:
train_split, test_split = .8 ,1 # We allocate 80% for training and remaining 20% for test
root = '.'
result = root + '/Results/'
error_folder = './errors/'
run_metrics = './running_data/'
mod_summary = './model_summary/'


Path(result).mkdir(parents=True, exist_ok=True)
Path(error_folder).mkdir(parents=True, exist_ok=True)
Path(run_metrics).mkdir(parents=True, exist_ok=True)
Path(mod_summary).mkdir(parents=True, exist_ok=True)


if 'ADCP' in ts_stem:
    sensor_idx = [2,4]
else:
    sensor_idx = [1,8,9]

#load Temperature data from the local file
# The data is resampled with a 30 minutes range
# pick 80%-100% to test
Temp = []
data = pd.DataFrame()
file_name = '../sensor_data/interpolated_data/' + ts_stem
for i in sensor_idx: # We have data quality issues with the other sensors
    df = pd.read_csv(file_name+str(i)+'.csv',index_col="observed_timestamp", parse_dates=True)
    data = data.append(df)
    s = df.size
    print('data_length: ', s)
    df_train = df[:int(train_split*s)]
    df_train[np.isnan(df_train)]=-1.0
    df_test = df[int(train_split*s):int(test_split*s)]
    print(df_train.size, df_test.size)
    Temp.append(df.value.to_list()) # Creating a list of all sensor dataframes

In [None]:
nlag = 48
Temp_time = df.index[nlag:] # Why? 
test_Temp_time = Temp_time[int(train_split*(len(Temp_time))):]

print([len(Temp[i]) for i in range(len(Temp))])
l = min([len(Temp[i]) for i in range(len(Temp))])
# We want all sensors to begin at same time
Temp = [T[:l] for T in Temp]
print([len(Temp[i]) for i in range(len(Temp))])

if 'ADCP' in ts_stem:
    df = pd.DataFrame({'S1': Temp[0],
                'S2': Temp[1]})
else:
    df = pd.DataFrame({'S1': Temp[0],
            'S2': Temp[1],
            'S3': Temp[2]})
Temp_diff = df.diff(periods=nlag)[nlag:]
#dataset
Temp_diff = np.transpose(Temp_diff.to_numpy())


# Model parameters

In [None]:
#training info
train_time = 6
predict_time = 1
predict_position = 47
Stride = 1
epoches = 100
batch_size = 512

In [None]:
#SPATIAL -------------------------- model training 
if 'SPATIAL' in model_select:
    name = '_Temp_diff_' + 'SPATIAL'
    tp_py, tp_ty, tp_error, tp_std, tp_model = SP_Learner(Temp_diff, name,  train_split, test_split,  train_time, predict_time, 
                                                            predict_position, Stride, 0, epoches, run_model='SPATIAL',
                                                            batch_size=batch_size, plot=False)
    print('Temp SPATIAL MEAN: ', data.mean(), 'STD: ',data.std(), 'Skew: ', data.skew())

    #record the errors
    error_file = error_folder + name + '_error.txt'
    open(error_file, 'w').close()
    MAE = []
    MAPE = []
    STD = []
    with open(error_file, "a") as text_file:
        for i in range(len(tp_py)):
            mae = mean_absolute_error(tp_ty[i], tp_py[i])
            MAE.append(mae)
            mape = mean_absolute_percentage_error(tp_ty[i], tp_py[i])
            MAPE.append(mape)
            std = np.std(abs(tp_ty[i] - tp_py[i]))
            STD.append(std)
            info = 'Oxygen sensor {}:   mae: {:.4f},  std: {:.4f}, mape: {:.4f}'.format(i, mae, std, mape)
            text_file.write(info + "\n")
            print('mae: ', mae, 'std: ', std, 'mape: ', mape)
        info = 'Oxygen:   MAE: {:.4f},  STD: {:.4f}, MAPE: {:.4f}'.format(np.mean(MAE), np.mean(STD), np.mean(MAPE))
        text_file.write(info + "\n")

    #save predicted data
    test_time = test_Temp_time[47+6:]
    print(len(test_time), len(tp_ty[0]))
    for i in range(len(tp_py)):
        df = pd.DataFrame({'date': test_time,
                    'value': tp_py[i]})
    #    df.to_csv(root+'/prediction/' + ts_stem +str(sensor_idx[i])+'_SPATIAL_prediction.csv', index=False)

    ytest_spatial = tp_ty
    ypred_spatial = tp_py

In [None]:
#CNN -------------------------- model training 
if 'CNN' in model_select:
    name = '_Temp_diff_' + 'CNN'
    tp_py, tp_ty, tp_error, tp_std, tp_model = SP_Learner(Temp_diff, name, train_split, test_split, train_time, predict_time, 
                                                        predict_position, Stride, 0, epoches, run_model='CNN',
                                                        batch_size=batch_size,  plot=False)

    print('Temp CNN MEAN: ', data.mean(), 'STD: ',data.std(), 'Skew: ', data.skew())

    #record the errors
    error_file = error_folder + name + '_error.txt'
    open(error_file, 'w').close()
    MAE = []
    MAPE = []
    STD = []
    with open(error_file, "a") as text_file:
        for i in range(len(tp_py)):
            mae = mean_absolute_error(tp_ty[i], tp_py[i])
            MAE.append(mae)
            mape = mean_absolute_percentage_error(tp_ty[i], tp_py[i])
            MAPE.append(mape)
            std = np.std(abs(tp_py[i] - tp_ty[i]))
            STD.append(std)
            info = 'Oxygen sensor {}:   mae: {:.4f},  std: {:.4f}, mape: {:.4f}'.format(i, mae, std, mape)
            text_file.write(info + "\n")
            print('mae: ', mae, 'std: ', std, 'mape: ', mape)
        info = 'Oxygen:   MAE: {:.4f},  STD: {:.4f}, MAPE: {:.4f}'.format(np.mean(MAE), np.mean(STD), np.mean(MAPE))
        text_file.write(info + "\n")

    #save predicted data
    test_time = test_Temp_time[47+6:]
    print(len(test_time), len(tp_ty[0]))

    for i in range(len(tp_py)):
        df = pd.DataFrame({'date': test_time,
                    'value': tp_py[i]})
        df.to_csv(root+'/prediction/' + ts_stem +str(sensor_idx[i])+'_CNN_prediction.csv', index=False)
    ytest_CNN = tp_ty
    ypred_CNN = tp_py


In [None]:
#LSTM -------------------------- model training 
if 'LSTM' in model_select:
    name = '_Temp_diff_' + 'CNN'
    tp_py, tp_ty, tp_error, tp_std, tp_model = SP_Learner(Temp_diff, name, train_split, test_split, train_time, predict_time, 
                                                        predict_position, Stride, 0, epoches, run_model='LSTM',
                                                        batch_size=batch_size,  plot=False)

    print('Temp LSTM MEAN: ', data.mean(), 'STD: ',data.std(), 'Skew: ', data.skew())

    #record the errors
    error_file = error_folder + name + '_error.txt'
    open(error_file, 'w').close()
    MAE = []
    MAPE = []
    STD = []
    with open(error_file, "a") as text_file:
        for i in range(len(tp_py)):
            mae = mean_absolute_error(tp_ty[i], tp_py[i])
            MAE.append(mae)
            mape = mean_absolute_percentage_error(tp_ty[i], tp_py[i])
            MAPE.append(mape)
            std = np.std(abs(tp_py[i] - tp_ty[i]))
            STD.append(std)
            info = 'Oxygen sensor {}:   mae: {:.4f},  std: {:.4f}, mape: {:.4f}'.format(i, mae, std, mape)
            text_file.write(info + "\n")
            print('mae: ', mae, 'std: ', std, 'mape: ', mape)
        info = 'Temperature:   MAE: {:.4f},  STD: {:.4f}, MAPE: {:.4f}'.format(np.mean(MAE), np.mean(STD), np.mean(MAPE))
        text_file.write(info + "\n")

    #save predicted data
    test_time = test_Temp_time[47+6:]
    print(len(test_time), len(tp_ty[0]))

    for i in range(len(tp_py)):
        df = pd.DataFrame({'date': test_time,
                    'value': tp_py[i]})
        df.to_csv(root+'/prediction/' + ts_stem +str(sensor_idx[i])+'_LSTM_prediction.csv', index=False)
    ytest_lstm = tp_ty
    ypred_lstm = tp_py



In [None]:
sel = 0
plt.plot(ytest_CNN[sel], 'k.', label = 'test')
plt.plot(ypred_CNN[sel], 'r.', label = 'CNN')
plt.plot(ypred_spatial[sel], 'g.', label = 'spatial')
plt.plot(ypred_lstm[sel], 'b.', label = 'LSTM')
plt.legend(loc = 'upper left',fontsize='xx-small')
plt.ylim([-1,1])
#plt.xlim([250,750])

In [None]:
for i in range(len(ytest_CNN)):
    mae_CNN = mean_absolute_error(ytest_CNN[i], ypred_CNN[i])
    mae_lstm = mean_absolute_error(ytest_CNN[i], ypred_lstm[i])
    mae_spatial = mean_absolute_error(ytest_CNN[i], ypred_spatial[i])
    print(i, mae_CNN, mae_lstm, mae_spatial)