# Import all libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
from  datetime import datetime, timedelta, time

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Conv3D,MaxPooling3D, Dense,Flatten, Concatenate, ConvLSTM2D, ConvLSTM3D
from tensorflow.keras.layers import Input
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError
import wandb
from wandb.keras import WandbCallback
tf.test.gpu_device_name()

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Define function

In [None]:
from sklearn.preprocessing import MinMaxScaler,StandardScaler
def split_to_each_step(df, fh_step):
  use_cols = ['site','Datetime', f'I_lead_{fh_step}step', 
                     'I', f'I_lead_{fh_step}step_back1D', f'hour_index_lead_{fh_step}step', f'iclr_lead_{fh_step}step', 
                     'I_lag_1step', 'ci_center', f'ci_est(t+{fh_step})']


  _df = df[use_cols]
  _df = _df.dropna()
  _df['Datetime'] = pd.to_datetime(_df['Datetime'])

  date_index = _df['Datetime'].dt.date
  _df_train = _df[date_index.isin(train_date_list)]
  # _df_val = _df[date_index.isin(val_date_list)]
  _df_test = _df[date_index.isin(test_date_list)]

  
  _df_train = _df_train.set_index(['site', 'Datetime'])
  # _df_val = _df_val.set_index(['site', 'Datetime'])
  _df_test = _df_test.set_index(['site', 'Datetime'])


  scaler = StandardScaler()
  X_train = _df_train.drop(columns=[f'I_lead_{fh_step}step'])
  X_train = scaler.fit_transform(X_train)

  _df_train[list(set(_df_train.columns) - set([f'I_lead_{fh_step}step']))] = X_train
  # _df_val[list(set(_df_val.columns) - set(['filename', f'I_lead_{fh_step}step']))] = scaler.transform(_df_val.drop(columns=['filename', f'I_lead_{fh_step}step']))
  _df_test[list(set(_df_test.columns) - set([f'I_lead_{fh_step}step']))] = scaler.transform(_df_test.drop(columns=[f'I_lead_{fh_step}step']))
  
  return _df_train, _df_test, scaler


# Import data

In [None]:
import pickle
f = open('train_test_date_listVar.pkl', 'rb')
train_date_list, test_date_list, val_date_list,train_date_val_list = pickle.load(f)
all_sites_df = pd.read_csv('processed_all_sites_HS1e0_df_not_imputed_R_channel.csv')

# Model training

## Split data

In [None]:
from joblib import dump
fh_step=1
df_train, df_test,scaler = split_to_each_step(all_sites_df, fh_step=fh_step)
print(f'model {fh_step} step, the data have {df_train.shape[0]} samples for training and {df_test.shape[0]} for testing')

In [None]:
# log in to WandB to log the experiment
key = ''
wandb.login(key=key)

run = wandb.init(project='ANN-with-cloud',
                 config={
                     'learning_rate':0.005,
                     'epochs':30,
                     'batch_size':32,
                     'loss_function':"mean_absolute_error",
                     'architecture':'ANN',
                     'fh_step':fh_step,
                     },
                 name=f'ANN-{fh_step}step')
config = wandb.config

In [None]:
X_train, y_train = df_train.drop(columns=[f'I_lead_{fh_step}step']),   df_train[f'I_lead_{fh_step}step']
X_test, y_test = df_test.drop(columns=[f'I_lead_{fh_step}step']),   df_test[f'I_lead_{fh_step}step']


## Define model structure

In [None]:
from keras import backend as K
K.clear_session()
input = Input(shape=X_train.shape[1],)
dense1 = Dense(64, activation='relu')(input)
dense2 = Dense(64, activation='relu')(dense1)
dense3 = Dense(64, activation='relu')(dense2)

output = Dense(1, activation='relu')(dense3)

model = Model(inputs=input, outputs=output)
model.summary()

## Fit model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_absolute_error, mean_squared_error
np.random.seed(42)
tf.random.set_seed(42)
model.compile(loss="mean_absolute_error", 
              optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate), 
              metrics=['mean_absolute_error', 'RootMeanSquaredError'])
history = model.fit(X_train,y_train,
                    validation_data=(X_test, y_test),
                    epochs=wandb.config.epochs, 
                    callbacks=[EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True),
                                       WandbCallback()])
model.save_weights(f'weights_ANN{fh_step}step.h5')
y_pred = model.predict(X_test)
y_pred = y_pred.flatten()
y_true = df_test[f'I_lead_{fh_step}step'].to_numpy()
df_test[f'I_pred{fh_step}step'] = y_pred
df_test.to_csv(f'df{fh_step}step_test-ANN.csv') 
mae = mean_absolute_error(y_true, y_pred)
rmse = (mean_squared_error(y_true, y_pred))**0.5
print(mae,rmse)
wandb.save(f'df{fh_step}step_test-ANN.csv')
wandb.save(f'weights_ANN{fh_step}step.h5')
wandb.finish()