# Import all libraries

In [None]:
import numpy as np 
import pandas as pd 
import random
import os
from  datetime import datetime, timedelta, time

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Conv3D,MaxPool3D, Dense,Flatten, Concatenate, ConvLSTM2D, ConvLSTM3D
from tensorflow.keras.layers import Input
from tensorflow.keras.metrics import RootMeanSquaredError, MeanAbsoluteError

!pip install wandb
import wandb
from wandb.keras import WandbCallback
tf.test.gpu_device_name()

# Define relevant class

In [None]:
from sklearn.preprocessing import MinMaxScaler,StandardScaler
def split_to_each_step(df, fh_step):
  use_cols = ['site','Datetime', f'I_lead_{fh_step}step', 
                     'I', f'I_lead_{fh_step}step_back1D', f'hour_index_lead_{fh_step}step', f'iclr_lead_{fh_step}step', 
                     'I_lag_1step',
                     'filename']

  _df = df[use_cols]
  _df = _df.dropna()
  _df['Datetime'] = pd.to_datetime(_df['Datetime'])

  date_index = _df['Datetime'].dt.date

  _df_train = _df[date_index.isin(train_date_val_list)]
  # _df_val = _df[date_index.isin(val_date_list)]
  _df_test = _df[date_index.isin(val_date_list)]

  _df_train = _df_train.set_index(['site', 'Datetime'])
  # _df_val = _df_val.set_index(['site', 'Datetime'])
  _df_test = _df_test.set_index(['site', 'Datetime'])


  scaler = StandardScaler()
  X_train = _df_train.drop(columns=['filename', f'I_lead_{fh_step}step'])
  X_train = scaler.fit_transform(X_train)

  _df_train[list(set(_df_train.columns) - set(['filename', f'I_lead_{fh_step}step']))] = X_train
  # _df_val[list(set(_df_val.columns) - set(['filename', f'I_lead_{fh_step}step']))] = scaler.transform(_df_val.drop(columns=['filename', f'I_lead_{fh_step}step']))
  _df_test[list(set(_df_test.columns) - set(['filename', f'I_lead_{fh_step}step']))] = scaler.transform(_df_test.drop(columns=['filename', f'I_lead_{fh_step}step']))
  
  return _df_train, _df_test, scaler


In [None]:
class CustomDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, image_dir, image_size, batch_size, fh_step):
        self.df = df
        self.image_dir = image_dir
        self.image_size = image_size
        self.batch_size = batch_size
        self.fh_step = fh_step

    def __len__(self):
        return int(np.ceil(len(self.df) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch = self.df[idx * self.batch_size:(idx + 1) * self.batch_size]
        images = []
        for _, row in batch.iterrows():
            image_path = os.path.join(self.image_dir, row['filename'])
            image = np.load(image_path)
            image = image.transpose((2,0,1))
            image = image.reshape((2,35,35,1))
            # if (image.shape[0] != 34) | (image.shape[1] != 34) :
            #   print(row['filename'], image.shape)
            images.append(image)
        images = np.array(images)
        
        numerical_features = np.array(batch.drop(columns=['filename', f'I_lead_{self.fh_step}step']))

        labels = np.array(batch[f'I_lead_{self.fh_step}step'])
        inputs = [images, numerical_features]
        return inputs, labels

    def get_labels(self):
        return np.array(self.df[f'I_lead_{self.fh_step}step'])

# Import data
- import cnn table and train test date list.
- if train in google colab , created blank folder to store sequence image (.npy) name all_images

In [None]:
# create blank folder and download zip file from gg drive to colab VM
!mkdir all_images
!gdown https://drive.google.com/uc?id=1YvN_Vjssq85-InJCFMx_Nzu6uwYXdkYh

In [None]:
# extract zip file to all_images folder
!tar -xzvf "/content/cutted_im_all_r.tar.gz" -C "/content/all_images"

In [None]:
# change the path to exact location.
datelist_var_path = 'train_test_date_listVar.pkl'
csv_path = 'df_cnn_r.csv'

In [None]:
import pickle
f = open(datelist_var_path, 'rb')
train_date_list, test_date_list, val_date_list,train_date_val_list = pickle.load(f)

In [None]:
df = pd.read_csv(csv_path)
df = df.iloc[:,1:]

# CNN-LSTM

In [None]:
fh_step=1
df_train, df_test,scaler = split_to_each_step(df, fh_step=fh_step)
print(f'model {fh_step} step, the data have {df_train.shape[0]} samples for training and {df_test.shape[0]} for testing')

In [None]:
# log in to WandB to log the experiment
key = ''
wandb.login(key=key)
run = wandb.init(project='CNN_RGB',
                 config={
                     'learning_rate':0.01,
                     'epochs':30,
                     'batch_size':32,
                     'loss_function':"mean_absolute_error",
                     'architecture':'CNN_LSTM_5dense',
                     'fh_step':fh_step,
                     },
                 name=f'{fh_step}_step_only_r_prac')
config = wandb.config

In [None]:
train_generator = CustomDataGenerator(df=df_train, image_dir='stack-r-channel', image_size=(2,35, 35,1), batch_size=wandb.config.batch_size, fh_step=fh_step)
# val_generator = CustomDataGenerator(df=df_val, image_dir='/content/all_images', image_size=(35, 35,2), batch_size=config.batch_size, fh_step=fh_step)
test_generator = CustomDataGenerator(df=df_test, image_dir='stack-r-channel', image_size=(2,35, 35,1), batch_size=wandb.config.batch_size, fh_step=fh_step)

## Define model structure

In [None]:
from keras import backend as K
K.clear_session()
im_input_shape = (2, 35, 35, 1)

im_inputs = Input(shape=im_input_shape)

convlstm = ConvLSTM2D(filters=64, kernel_size=3, activation='relu', 
                      padding='same', return_sequences=True, data_format='channels_last')(im_inputs)

im_flatten = Flatten()(convlstm)
meas_input_shape = (df_train.shape[1]-2,)
meas_inputs = Input(shape=meas_input_shape)

concatenated = Concatenate(axis=1)([im_flatten, meas_inputs])

dense1 = Dense(units=32,activation='relu')(concatenated)
dense2 = Dense(units=32, activation='relu')(dense1)
dense3 = Dense(units=32, activation='relu')(dense2)
dense4 = Dense(units=32, activation='relu')(dense3)
dense5 = Dense(units=32, activation='relu')(dense4)
outputs = Dense(units=1, activation='relu')(dense5)

model = Model(inputs=[im_inputs, meas_inputs], outputs=outputs)
model.summary()

## Fit model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
model_path = ''
np.random.seed(42)
tf.random.set_seed(42)
model.compile(loss="mean_absolute_error", 
              optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate), 
              metrics=['mean_absolute_error', 'RootMeanSquaredError'])
history = model.fit_generator(generator=train_generator, 
                              validation_data=test_generator,
                              epochs=wandb.config.epochs, 
                              callbacks=[
                          EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True),
                                       WandbCallback()])

model.save_weights(os.path.join(model_path, f'weights_CNN-LSTM{fh_step}step_RGB.h5'))

from sklearn.metrics import mean_absolute_error, mean_squared_error
y_pred = model.predict_generator(test_generator)
y_pred = y_pred.flatten()
y_true = df_test[f'I_lead_{fh_step}step'].to_numpy()
df_test[f'I_pred{fh_step}step'] = y_pred
df_test.to_csv(f'df{fh_step}step_testCNN-LSTM_R.csv') 
mae = mean_absolute_error(y_true, y_pred)
rmse = (mean_squared_error(y_true, y_pred))**0.5
wandb.save(f'df{fh_step}step_testCNN-LSTM_R.csv')
wandb.finish()

# 3D-CNN

In [None]:
fh_step=1
df_train, df_test,scaler = split_to_each_step(df, fh_step=fh_step)
print(f'model {fh_step} step, the data have {df_train.shape[0]} samples for training and {df_test.shape[0]} for testing')

In [None]:
# log in to WandB to log the experiment
key = ''
wandb.login(key=key)
run = wandb.init(project='CNN_RGB',
                 config={
                     'learning_rate':0.01,
                     'epochs':30,
                     'batch_size':32,
                     'loss_function':"mean_absolute_error",
                     'architecture':'CNN_LSTM_5dense',
                     'fh_step':fh_step,
                     },
                 name=f'{fh_step}_step_only_r_prac')
config = wandb.config

In [None]:
train_generator = CustomDataGenerator(df=df_train, image_dir='/content/all_images', image_size=(2,35, 35,1), batch_size=wandb.config.batch_size, fh_step=fh_step)
# val_generator = CustomDataGenerator(df=df_val, image_dir='/content/all_images', image_size=(35, 35,2), batch_size=config.batch_size, fh_step=fh_step)
test_generator = CustomDataGenerator(df=df_test, image_dir='/content/all_images', image_size=(2,35, 35,1), batch_size=wandb.config.batch_size, fh_step=fh_step)

## Define model structure

In [None]:
from keras import backend as K
K.clear_session()
im_input_shape = (2, 35, 35, 1)

im_inputs = Input(shape=im_input_shape)

conv1 = Conv3D(filters=32, kernel_size=(2,3,3), activation='relu', 
                      padding='same', data_format='channels_last')(im_inputs)

maxpool1 = MaxPool3D(pool_size=(1,2,2), strides=(1,2,2),padding='valid')(conv1)

conv2 = Conv3D(filters=32, kernel_size=(2,3,3), activation='relu', 
                      padding='valid', data_format='channels_last')(maxpool1)

maxpool2 = MaxPool3D(pool_size=(1,2,2), strides=(1,2,2))(conv2)


im_flatten = Flatten()(maxpool2)

meas_input_shape = (df_train.shape[1]-2,)
meas_inputs = Input(shape=meas_input_shape)

concatenated = Concatenate(axis=1)([im_flatten, meas_inputs])

dense1 = Dense(units=32,activation='relu')(concatenated)
dense2 = Dense(units=32, activation='relu')(dense1)
dense3 = Dense(units=32, activation='relu')(dense2)
dense4 = Dense(units=32, activation='relu')(dense3)
dense5 = Dense(units=32, activation='relu')(dense4)
outputs = Dense(units=1, activation='relu')(dense5)

model = Model(inputs=[im_inputs, meas_inputs], outputs=outputs)
model.summary()

## Fit model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
model_path = ''
np.random.seed(42)
tf.random.set_seed(42)
model.compile(loss="mean_absolute_error", 
              optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate), 
              metrics=['mean_absolute_error', 'RootMeanSquaredError'])
history = model.fit_generator(generator=train_generator, 
                              validation_data=test_generator,
                              epochs=wandb.config.epochs, 
                              callbacks=[
                          EarlyStopping(monitor="val_loss",patience=10,restore_best_weights=True),
                                       WandbCallback()])


model.save_weights(os.path.join(model_path, f'weights_3DCNN{fh_step}step_RGB.h5'))

from sklearn.metrics import mean_absolute_error, mean_squared_error
y_pred = model.predict_generator(test_generator)
y_pred = y_pred.flatten()
y_true = df_test[f'I_lead_{fh_step}step'].to_numpy()
df_test[f'I_pred{fh_step}step'] = y_pred
df_test.to_csv(f'df{fh_step}step_test3DCNN_R_test.csv') 
mae = mean_absolute_error(y_true, y_pred)
rmse = (mean_squared_error(y_true, y_pred))**0.5
wandb.save(f'df{fh_step}step_test3DCNN_R_test.csv')
print(mae,rmse)
wandb.finish()