# Rajshahi dataset Model testing

#### Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow.keras import Model, Sequential
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError

from tensorflow.keras.layers import Dense, Conv1D, LSTM, RNN

import warnings
warnings.filterwarnings('ignore')

### Load LSTM model file

In [2]:
rajshahi_model = keras.models.load_model("lstm_rajshahi_model.h5")

### Read test dataset

In [3]:
test_df = pd.read_csv("../data/test.csv", index_col='datetime',parse_dates=True)

In [4]:
print(test_df.index)
print(test_df.info())

DatetimeIndex(['2022-06-19', '2022-06-20', '2022-06-21', '2022-06-22',
               '2022-06-23', '2022-06-24', '2022-06-25', '2022-06-26',
               '2022-06-27', '2022-06-28',
               ...
               '2023-08-08', '2023-08-09', '2023-08-10', '2023-08-11',
               '2023-08-12', '2023-08-13', '2023-08-14', '2023-08-15',
               '2023-08-16', '2023-08-17'],
              dtype='datetime64[ns]', name='datetime', length=425, freq=None)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 425 entries, 2022-06-19 to 2023-08-17
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   tempmax                     425 non-null    float64
 1   tempmin                     425 non-null    float64
 2   temp                        425 non-null    float64
 3   feelslikemax                425 non-null    float64
 4   feelslikemin                425 non-null    float64
 5   feel

In [5]:
test_df.columns

Index(['tempmax', 'tempmin', 'temp', 'feelslikemax', 'feelslikemin',
       'feelslike', 'dew', 'humidity', 'precip', 'precipprob', 'precipcover',
       'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'solarenergy', 'uvindex', 'river_discharge',
       'apparent_temperature_mean', 'precipitation_sum ', 'rain_sum ',
       'precipitation_hours', 'et0_fao_evapotranspiration'],
      dtype='object')

### Define Timeseries data on test df

In [6]:
class DataWindow:
    def __init__(self, input_width, label_width, shift, test_df, label_columns=None):
        self.test_df = test_df
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(test_df.columns)}
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def split_to_inputs_labels(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis=-1
            )
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])

        return inputs, labels

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32
        )

        ds = ds.map(self.split_to_inputs_labels)
        return ds

    @property
    def test(self):
        return self.make_dataset(self.test_df)
   

#### Predictions

In [7]:
def predict(days: int):
    custom_mo_wide_window = DataWindow(input_width=days, label_width=days, shift=days, test_df=test_df,
                                       label_columns=['precip','rain_sum ','river_discharge'])

    predicted_results = rajshahi_model.predict(custom_mo_wide_window.test)
    predicted_array= predicted_results[0]

    predicted_numpy_array = np.array(predicted_array)

    df_scaled = pd.DataFrame(predicted_numpy_array)

    df = df_scaled.rename(columns={0: "river_discharge", 1: "rain_sum ",2:"precip"})
    
    RD_max_train = 64119.98 
    RD_min_train = 947.42
    RD_max_test =  41019.4
    RD_min_test =  77.7

    R_max_train = 150.6
    R_min_train = 0.0
    R_max_test = 49.9
    R_min_test = 0.0

    P_max_train = 68.6
    P_min_train = 0.0
    P_max_test = 4.297
    P_min_test = 0.0

   
    
    df['river_discharge'] = df['river_discharge'].apply(lambda x: x*(RD_max_train - RD_min_train) + RD_min_train)
    df['rain_sum '] = df['rain_sum '].apply(lambda x: x*(R_max_train - R_min_train) + R_min_train)
    df['precip'] = df['precip'].apply(lambda x: x*(P_max_train - P_min_train) + P_min_train)
    df['floods'] = df['precip'] >2;
                      
    return df


In [8]:
predict(14)



Unnamed: 0,river_discharge,rain_sum,precip,floods
0,2161.407401,2.590979,3.194046,True
1,1402.296724,0.746257,2.941358,True
2,2171.787019,1.356282,3.766638,True
3,1993.463659,-0.113032,2.939682,True
4,2006.134627,1.299187,2.251513,True
5,2109.867143,-1.081848,1.968287,False
6,2193.979804,-0.590925,2.064361,True
7,1560.751479,-2.425242,2.382535,True
8,1973.833214,0.187619,2.314656,True
9,2290.510093,-2.191603,2.531395,True
