<a href="https://colab.research.google.com/github/Chiebukar/Deep-Learning/blob/main/regression/temperature_forcasting_with_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Temperature Forcasting with Jena climate dataset

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d kusuri/jena-climate

In [None]:
!ls -d $PWD/*

In [None]:
!unzip \*.zip && rm *.zip

In [None]:
!ls -d $PWD/*

In [None]:
file_dir = '/content/jena_climate_2009_2016.csv'

In [None]:
import numpy as np
import pandas as pd

In [None]:
jena_df = pd.read_csv(file_dir)
jena_df.head()

In [None]:
jena_df.shape

In [None]:
jena_df.columns

In [None]:
jena_arr = np.array(jena_df.iloc[:, 1:])
jena_arr[:2]

In [None]:
# standardize data
len_train = 200000
mean = jena_arr[:len_train].mean(axis=0)
std = jena_arr[:len_train].std(axis=0)
jena_arr = (jena_arr-mean)/std

In [None]:
# generator to yield batches of data from the recent past and future target  
def generator(data, min_index, max_index , lookback= 1440, delay=144, step= 6, batch_size=18, shuffle=False):

  """
  yield batches of data from the recent past and future target

  data = original input data
  min_index = minimum index of data to draw from
  max_index  maximum index of sata to draw from
  lookback= Number of timestamps back for input data per target
  delay = Number of timestamp in the future for target per lookback
  steps = period in timestamps to sample data
  batch_size = number of samples per batch
  shuffle = To shuffle the samples or not

  """

  if max_index == None:
    max_index = len(data) - delay - 1
  i = min_index + lookback

  while 1:
    if shuffle:
      rows = np.random.randint(min_index + lookback, max_index, size= batch_size)
    else:
      if i + batch_size >= max_index:
        i = min_index + lookback
      rows = np.arange(i, min(i + batch_size, max_index))
      i += len(rows)
    
    samples = np.zeros((len(rows), lookback  //step, data.shape[-1]))
    targets = np.zeros((len(rows),))

    for j, row in  enumerate(rows):
      indices = range(rows[j] - lookback, rows[j], step)
      samples[j] = data[indices]
      targets[j] = data[rows[j] + delay][1]
    yield samples, targets

In [None]:
train_gen = generator(data= jena_arr,
                      min_index= 0,
                      max_index= 200000,
                      shuffle= True)

valid_gen = generator(data= jena_arr,
                      min_index= 200001,
                      max_index = 300000,
                      shuffle = True)

test_gen = generator(data = jena_arr,
                     min_index = 300001,
                     max_index = None,
                     shuffle= True)


In [None]:
# get validation and test steps
lookback = 1440
val_steps = (300000 - 200001 - lookback)
test_steps = (len(jena_arr) - 300001 - lookback)

In [None]:
# establish baseline
def evaluate_naive_method():
  batch_maes = []
  for step in range(val_steps):
    samples, targets = next(valid_gen)
    preds = samples[:, -1, 1]
    mae = np.mean(np.abs(preds - targets))
    batch_maes.append(mae)
  return (np.mean(batch_maes))

In [None]:
# get baseline evaluation
mae = evaluate_naive_method()
celsius_mae = mae * std[1]
celsius_mae

In [None]:

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from keras.callbacks import ModelCheckpoint

In [None]:
# build model
def build_model():
  model = Sequential()
  model.add(LSTM(32, dropout= 0.1, recurrent_dropout= 0.25,
                 return_sequences=True,  input_shape = (None, jena_arr.shape[-1])))
  model.add(LSTM(64, activation='tanh', dropout=0.5))
  model.add(Dense(8, activation= 'relu'))
  model.add(Dropout(0.1))
  model.add(Dense(1))

  model.compile(loss = 'mae', optimizer = 'rmsprop')
  return model

In [None]:
file_path= 'a_weights.best.hdf5'
checkpoint = ModelCheckpoint(file_path, monitor= 'val_loss', save_best_only= True, verbose= 1, mode= 'min')

In [None]:
model = build_model()
history = model.fit(train_gen, steps_per_epoch = 500, epochs= 25, validation_data= valid_gen, 
                    validation_steps = 500, callbacks= checkpoint)

In [None]:
history_df = pd.DataFrame(history.history)
history_df[['mae', 'val_mae']].plot()