In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('data_6july.csv', header = 1, index_col = 'Date/Time')
df.head()

Unnamed: 0_level_0,avg,max,min,avg.1,min.1,avg.2,avg.3,min.2,avg.4,max.1,...,max.9,min.9,last.1,last.2,avg.23,max.10,min.10,time.1,Daily ET0 [mm],Unnamed: 54
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-07-06 18:00:00,24.98,25.12,24.87,20.8,20.8,71,0.68,0.67,78.11,78.67,...,26.8,26.7,6173.0,6733,3,3,3,0,,
2023-07-06 17:00:00,25.78,26.34,25.38,20.5,20.1,223,0.89,0.79,72.93,75.56,...,26.8,26.7,6810.0,6770,4,4,3,55,,
2023-07-06 16:00:00,24.81,25.27,24.38,21.4,20.7,178,0.56,0.38,81.86,87.53,...,26.8,26.7,9365.0,6801,2,3,2,55,,
2023-07-06 15:00:00,24.52,25.19,23.11,21.6,21.3,195,0.48,0.2,84.26,92.86,...,26.8,26.7,9188.0,6796,2,3,1,60,,
2023-07-06 14:00:00,25.54,27.49,22.95,22.8,21.6,278,0.48,0.19,85.54,92.92,...,26.8,26.6,8742.0,6804,2,3,1,50,,


## Renaming Columns:

In [None]:
df.columns = [ 'air_temp_avg', 'air_temp_max', 'air_temp_min', 'dew_avg',
       'dew_min', 'solar_rad_avg', 'vpd_avg', 'vpd_min', 'rel_humi_avg',
       'rel_humi_max', 'rel_humi_min', 'precp_sum', 'leaf_wetness_time(min)',
       'wind_speed_avg', 'wind_speed_max', 'wind_gust_max', 'wind_dir_last',
       'eag_sm1_avg', 'eag_sm2_avg', 'eag_sm3_avg', 'eag_sm4_avg',
       'eag_sm5_avg', 'eag_sm6_avg', 'VIC1_avg', 'VIC2_avg', 'VIC3_avg',
       'VIC4_avg', 'VIC5_avg', 'VIC6_avg', 'soil_temp1_avg', 'soil_temp1_max',
       'soil_temp1_min', 'soil_temp2_avg', 'soil_temp2_max', 'soil_temp2_min',
       'soil_temp3_avg', 'soil_temp3_max', 'soil_temp3_min', 'soil_temp4_avg',
       'soil_temp4_max', 'soil_temp4_min', 'soil_temp5_avg', 'soil_temp5_max',
       'soil_temp5_min', 'soil_temp6_avg', 'soil_temp6_max', 'soil_temp6_min',
       'sp_last', 'bat_last', 'del_avg', 'del_max', 'del_min', 'sun_time',
       'Daily ET0 [mm]']

## Reversing and handling null values:

In [None]:
df = df[::-1]
df['Daily ET0 [mm]'].fillna(0, inplace = True)

## Selecting only Average Humidity feature:

In [None]:
df1 = df['rel_humi_avg']
df1.head()

Date/Time
2023-04-25 13:00:00    15.60
2023-04-25 14:00:00    15.85
2023-04-25 15:00:00    15.60
2023-04-25 16:00:00    15.22
2023-04-25 17:00:00    15.71
Name: rel_humi_avg, dtype: float64

## Scaling:

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler(feature_range=(0,1))
df1=scaler.fit_transform(np.array(df1).reshape(-1,1))

In [None]:
df1[:5]

array([[0.00448219],
       [0.007431  ],
       [0.00448219],
       [0.        ],
       [0.00577967]])

## Train, Test splits:

In [None]:
training_size=int(len(df1)*0.9)
test_size=len(df1)-training_size
train_data,test_data=df1[0:training_size,:],df1[training_size:len(df1),:1]

## Function to create X and y data:

In [None]:
def create_dataset(dataset, time_step=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-time_step):
		a = dataset[i:(i+time_step), 0]
		dataX.append(a)
		dataY.append(dataset[i + time_step, 0])
	return np.array(dataX), np.array(dataY)

In [None]:
time_step = 4
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

In [None]:
X, y = create_dataset(df1, time_step = 4)

In [None]:
X_train.shape, y_train.shape

((1554, 4), (1554,))

In [None]:
X_test.shape, y_test.shape

((170, 4), (170,))

## Reshaping the data:

In [None]:
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

## Model Fitting:

In [None]:
!pip install keras-tuner
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU
from kerastuner.tuners import RandomSearch
from sklearn.metrics import mean_squared_error, mean_absolute_error

Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/176.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.3.5 kt-legacy-1.0.5


In [None]:
def build_model(hp):
  mod = Sequential()

  mod.add(LSTM(units = hp.Int('units1', min_value = 20, max_value = 144, step = 12), input_shape = (time_step, 1)))
  mod.add(Dense(units = hp.Int('units_dense', min_value = 20, max_value = 144), activation = hp.Choice(values = ['linear','relu'], name ='Dense1_act')))
  mod.add(Dense(units = 1, activation = hp.Choice(values = ['linear','relu'], name = 'Dense2_act')))

  mod.compile(loss = 'mse', optimizer = 'adam', metrics = ['mse'])

  return mod

In [None]:
tuner1 = RandomSearch(build_model, objective = 'mse', max_trials = 10, directory = 'tuner_dir', project_name = 'LSTM_humi_finetuning')

In [None]:
history1 = tuner1.search(X_train, y_train, validation_data = (X_test, y_test), epochs = 25)

Trial 10 Complete [00h 00m 07s]
mse: 0.0031367561314255

Best mse So Far: 0.0030678852926939726
Total elapsed time: 00h 01m 39s


In [None]:
best_lstm_model = tuner1.get_best_models(num_models = 1)[0]
best_lstm_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 80)                26240     
                                                                 
 dense (Dense)               (None, 129)               10449     
                                                                 
 dense_1 (Dense)             (None, 1)                 130       
                                                                 
Total params: 36,819
Trainable params: 36,819
Non-trainable params: 0
_________________________________________________________________


## Predictions:

In [None]:
train_predict = best_lstm_model.predict(X_train)
test_predict = best_lstm_model.predict(X_test)



## Training Error:

In [None]:
print("Training MSE:",mean_squared_error(y_train,train_predict))
print("Training MAE:",mean_absolute_error(y_train, train_predict))

Training MSE: 0.003096000327908092
Training MAE: 0.04051338468669496


## Testing Error:

In [None]:
print("Testing MSE:",mean_squared_error(y_test, test_predict))
print("Testing MAE:", mean_absolute_error(y_test, test_predict))

Testing MSE: 0.003770178799644138
Testing MAE: 0.05022772550938546


## Reverse Scaling:

In [None]:
min_humi = df['rel_humi_avg'].min()
max_humi = df['rel_humi_avg'].max()
print(f"Minimum Average humidity: {min_humi} and Maximum Average humidity: {max_humi}")

Minimum Average humidity: 15.22 and Maximum Average humidity: 100.0


In [None]:
def rev_min_max_func(scaled_val):
  og = (scaled_val*(max_humi - min_humi)) + min_humi
  return og

## Training Predicted Real Values:

In [None]:
dct = {'Actual':y_train,'Predictions':train_predict.reshape(-1)}
train_pred_df = pd.DataFrame.from_dict(dct)
train_pred_df.set_index(df.index[time_step:X_train.shape[0]+time_step], inplace = True)

# Reverse Scaling Applied:
train_pred_df = train_pred_df.applymap(rev_min_max_func)
train_pred_df.head()

Unnamed: 0_level_0,Actual,Predictions
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-25 17:00:00,15.71,19.468876
2023-04-25 18:00:00,26.42,20.098937
2023-04-25 19:00:00,43.52,33.635119
2023-04-25 20:00:00,60.15,50.300802
2023-04-25 21:00:00,70.0,63.001614


## Plotting Interactive Chart:

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Actual'], mode = 'lines+markers', name = 'Actual'))
fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Predictions'], mode = 'lines+markers',name = 'Predictions'))

fig.update_layout(title = "Actual Average Humidity V/S Predicted Average Humidity (Train Data)")
fig.update_xaxes(title_text = "Datetime")
fig.update_yaxes(title_text = "Average Humidity")
fig.show()

## Testing Data:

In [None]:
dct1 = {'Actual':y_test,'Predictions':test_predict.reshape(-1)}
test_pred_df = pd.DataFrame.from_dict(dct1)
test_pred_df.set_index(df.index[X_train.shape[0]+8:], inplace = True)

# Reverse Scaling:
test_pred_df = test_pred_df.applymap(rev_min_max_func)
test_pred_df.head()

Unnamed: 0_level_0,Actual,Predictions
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-06-29 15:00:00,80.69,79.32094
2023-06-29 16:00:00,80.54,76.89162
2023-06-29 17:00:00,88.15,78.401187
2023-06-29 18:00:00,88.85,87.81577
2023-06-29 19:00:00,86.67,86.913218


In [None]:
fig1 = go.Figure()

fig1.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Actual'], mode = 'lines+markers', name = 'Actual'))
fig1.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Predictions'], mode = 'lines+markers', name = 'Predictions'))

fig1.update_layout(title = "Actual Average Humidity V/S Predicted Average Humidity (Test Data)")
fig1.update_xaxes(title_text = "Datetime")
fig1.update_yaxes(title_text = "Average Humidity")
fig1.show()

# **Forecasts**

## Creating a Generalized Function for forecasts:

In [None]:
# Function to forecast future:
def future_forecasts(num_forecasts, mod):
  X_test_reshaped = np.reshape(X,(X.shape[0], X.shape[1],1))

  future_pred = []
  time_step = 4

  last_seq = X_test_reshaped[-1]

  for _ in range(num_forecasts):
    last_seq = np.reshape(last_seq, (1, time_step, 1))

    #predicting and reshaping
    pred = mod.predict(last_seq)
    pred = np.reshape(pred,(1,1,1))

    #storing in the list
    future_pred.append(pred[0][0])

    # removing the first value and adds the previously predicted value in the end of set.
    last_seq = np.concatenate((last_seq[:,1:,:], pred), axis = 1)

  # applying reverse scaling by using rev_min_max_func
  real_values = [rev_min_max_func(i) for i in future_pred]

  ## Creating a dataframe with new appended index and forecasts:
  new_index = pd.date_range(df.index[-1], periods = num_forecasts, freq = '1H')

  forecast_df = pd.DataFrame(real_values, columns = ['Forecasts'])
  forecast_df.index = new_index

  return forecast_df

In [None]:
num = 24 # next day prediction
lstm_forecast_df = future_forecasts(num, mod = best_lstm_model)



## Plotting Train, Test and Forecasts:

In [None]:
fig = make_subplots(rows = 1, cols = 3, subplot_titles = ("Training Plot", "Testing Plot", "Forecast Plot"))
fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Actual'], mode = 'markers+lines', name = 'Actual'), row = 1, col = 1)
fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Predictions'], mode = 'markers+lines', name = 'Predictions'), row = 1, col = 1)

fig.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Actual'], mode = 'markers+lines', name = 'Actual'), row = 1, col = 2)
fig.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Predictions'], mode = 'markers+lines', name = 'Predictions'), row = 1, col = 2)

fig.add_trace(go.Scatter(x = lstm_forecast_df.index, y = lstm_forecast_df['Forecasts'], mode = 'markers+lines', name = 'Forecasts'), row = 1, col = 3)

fig.update_layout(height = 400, width = 2800, title_text = "Training, Testing and Future Forecasts")

fig.update_xaxes(title_text = 'Datetime')
fig.update_yaxes(title_text = 'Average Humidity')

fig.show()

## GRU Model:

In [None]:
def build_model(hp):
  mod = Sequential()

  mod.add(GRU(units = hp.Int('units1', min_value = 20, max_value = 144, step = 12), input_shape = (time_step, 1)))
  mod.add(Dense(units = hp.Int('dense_units', min_value = 10, max_value = 100), activation = hp.Choice(values = ['linear','relu'], name = 'gru_dense1')))
  mod.add(Dense(units = 1, activation = hp.Choice(values = ['linear','relu'], name = 'gru_dense2')))

  mod.compile(loss = 'mse', optimizer = 'adam', metrics = ['mse'])

  return mod

In [None]:
tuner2 = RandomSearch(build_model, objective = 'mse', max_trials = 10, directory = 'tuner_dir', project_name = 'Avg_humi_gru_finetuning')

In [None]:
tuner2.search(X_train, y_train, validation_data = (X_test, y_test), epochs = 25)

Trial 10 Complete [00h 00m 12s]
mse: 0.0033494255039840937

Best mse So Far: 0.00307649839669466
Total elapsed time: 00h 01m 44s


In [None]:
best_gru_model = tuner2.get_best_models(num_models = 1)[0]
best_gru_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 44)                6204      
                                                                 
 dense (Dense)               (None, 77)                3465      
                                                                 
 dense_1 (Dense)             (None, 1)                 78        
                                                                 
Total params: 9,747
Trainable params: 9,747
Non-trainable params: 0
_________________________________________________________________


## Predictions:

In [None]:
gru_train_pred = best_gru_model.predict(X_train)
gru_test_pred = best_gru_model.predict(X_test)



## Training Error:

In [None]:
print("Training MSE:", mean_squared_error(y_train, gru_train_pred))
print("Training MAE:", mean_absolute_error(y_train, gru_train_pred))

Training MSE: 0.003561144336243956
Training MAE: 0.04293709527879221


## Testing Error:

In [None]:
print("Testing MSE:", mean_squared_error(y_test, gru_test_pred))
print("Testing MAE:", mean_absolute_error(y_test, gru_test_pred))

Testing MSE: 0.002082689974665077
Testing MAE: 0.03298893987362429


## Reverse Scaling:

## Training Data:

In [None]:
dct = {'Actual':y_train,'Predictions':gru_train_pred.reshape(-1)}
train_pred_gru = pd.DataFrame.from_dict(dct)
train_pred_gru.set_index(df.index[time_step:X_train.shape[0]+time_step], inplace = True)

# Applying Reverse Scaling:
train_pred_gru = train_pred_gru.applymap(rev_min_max_func)
train_pred_gru.head()

Unnamed: 0_level_0,Actual,Predictions
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-04-25 17:00:00,15.71,18.752734
2023-04-25 18:00:00,26.42,19.457913
2023-04-25 19:00:00,43.52,33.968967
2023-04-25 20:00:00,60.15,51.694487
2023-04-25 21:00:00,70.0,65.009692


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = train_pred_gru.index, y = train_pred_gru['Actual'], mode = 'lines+markers', name = 'Actual'))
fig.add_trace(go.Scatter(x = train_pred_gru.index, y = train_pred_gru['Predictions'], mode = 'lines+markers', name = 'Predictions'))

fig.update_layout(title = "Actual Average Humidity V/S Predicted Average Humidity (Train Data using GRU)")
fig.update_xaxes(title_text = 'Datetime')
fig.update_yaxes(title_text = 'Average Humidity')
fig.show()

## Testing Data:

In [None]:
dct = {'Actual':y_test,'Predictions':gru_test_pred.reshape(-1)}
test_pred_gru = pd.DataFrame.from_dict(dct)
test_pred_gru.set_index(df.index[X_train.shape[0]+8:], inplace = True)

# Applying Reverse Scaling:
test_pred_gru = test_pred_gru.applymap(rev_min_max_func)
test_pred_gru.head()

Unnamed: 0_level_0,Actual,Predictions
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-06-29 15:00:00,80.69,83.368103
2023-06-29 16:00:00,80.54,80.905902
2023-06-29 17:00:00,88.15,82.507716
2023-06-29 18:00:00,88.85,90.828481
2023-06-29 19:00:00,86.67,90.020001


In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = test_pred_gru.index, y = test_pred_gru['Actual'], mode = 'lines+markers', name = 'Actual'))
fig.add_trace(go.Scatter(x = test_pred_gru.index, y = test_pred_gru['Predictions'], mode = 'lines+markers', name = 'Predictions'))

fig.update_layout(title = "Actual Average Humidity V/S Predicted Average Humidity (Test Data using GRU)")
fig.update_xaxes(title_text = 'Datetime')
fig.update_yaxes(title_text = 'Average Humidity')
fig.show()

# **Forecasts:**

In [None]:
num = 24 #next day forecast
gru_forecasts = future_forecasts(num, mod = best_gru_model)



## Plotting Training, Testing and Forecasts:

In [None]:
fig = make_subplots(rows = 1, cols = 3, subplot_titles = ("Training Plot", "Testing Plot", "Forecast Plot"))
fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Actual'], mode = 'markers+lines', name = 'Actual'), row = 1, col = 1)
fig.add_trace(go.Scatter(x = train_pred_df.index, y = train_pred_df['Predictions'], mode = 'markers+lines', name = 'Predictions'), row = 1, col = 1)

fig.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Actual'], mode = 'markers+lines', name = 'Actual'), row = 1, col = 2)
fig.add_trace(go.Scatter(x = test_pred_df.index, y = test_pred_df['Predictions'], mode = 'markers+lines', name = 'Predictions'), row = 1, col = 2)

fig.add_trace(go.Scatter(x = gru_forecasts.index, y = gru_forecasts['Forecasts'], mode = 'markers+lines', name = 'Forecasts'), row = 1, col = 3)

fig.update_layout(height = 400, width = 2800, title_text = "Training, Testing and Future Forecasts")

fig.update_xaxes(title_text = 'Datetime')
fig.update_yaxes(title_text = 'Average Humidity')

fig.show()

## Error Check for both models:

In [None]:
print("LSTM MSE for test data:", mean_squared_error(y_test, test_predict))
print("LSTM MAE for test data", mean_absolute_error(y_test, test_predict))
print("GRU MSE for test data:", mean_squared_error(y_test, gru_test_pred))
print("GRU MAE for test data:", mean_absolute_error(y_test, gru_test_pred))

LSTM MSE for test data: 0.003770178799644138
LSTM MAE for test data 0.05022772550938546
GRU MSE for test data: 0.002082689974665077
GRU MAE for test data: 0.03298893987362429


## The one with lowest MSE and MAE is considered the best model.