### Time Series Forecasting using LSTM

In [None]:
# Importing libraries
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
import os
from convert_columns_to_floats import *

mpl.rcParams['figure.figsize'] = (8,6)
mpl.rcParams['axes.grid'] = False



In [None]:
orig_df = pd.read_csv('/Users/faymajidelhassan/Downloads/Master project /Data/Weather/forecasts/open_weather.csv') 
df = orig_df.copy() 
print(f'Size of the dataset: {df.shape} \n')  
print() 
display(df.head(5))

In [None]:
# Convert specific columns to lists of floats
# columns_to_convert1 = [
#     'temperature', 'humidity', 'pressure',	'cloud_cover',	'wind_speed',
#         	'wind_direction',	'weather_code'	,'poprecipitation'
# ]

columns_to_convert2 = [
    'temperature', 'humidity', 'pressure',
    'cloud_cover', 'wind_speed', 'wind_direction', 'poprecipitation',
     'weather_code'
]
def convert_columns_to_floats2(df, columns_to_convert):
    for col in columns_to_convert:
        print(f"Processing column: {col}")
        df[col] = df[col].astype(str).apply(parse_complex_string)
        print(f"Processed column: {col}")
    return df

def flatten_columns(df, columns_to_flatten):
    for col in columns_to_flatten:
        df[col] = df[col].apply(lambda x: np.mean(x) if isinstance(x, list) else x)
    return df
def reduce_cells_for_all_columns(df, columns_to_reduce,n=int):
    '''
    Reduce each cell in each column by 96 values from the end
    '''
    for cols in columns_to_reduce:
        df[cols] = df[cols].apply(lambda x: x[:-n])
    return df
# df = convert_columns_to_floats2(df, columns_to_convert1)
df= convert_columns_to_floats2(df, columns_to_convert2)
# Flatten the columns
# df= reduce_cells_for_all_columns(df,columns_to_convert2,n=72)
# df = flatten_columns(df, columns_to_convert1)
df = flatten_columns(df, columns_to_convert2)

In [None]:
df.head()

In [47]:
# Convert timestamp to datetime and set as index
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Fill missing values using forward fill
df = df.fillna(method='ffill')

# Plot univariate data (temperature)
uni_data = df['temperature']
uni_data.plot()

# Resample and aggregate the data
df = df.resample('10T').agg({
    'temperature': 'mean',  
    'humidity': 'mean',     
    'pressure': 'mean',  
    
    'cloud_cover': 'mean',
    'wind_speed': 'mean',
    'wind_direction': 'mean',
    'poprecipitation': 'sum',
    
    'weather_code': 'sum'
})


In [None]:
df.describe()

Observations:
1) One reading evrry 10 mins (from datatime column time diff for every record )
2) 1day = 6*24 = 144 readings
Task : Forecasting Temperature(in degree ) in future 




In [None]:
uni_data = uni_data.values



In [None]:
uni_data.shape

In [None]:
# uni_data = np.array(uni_data)

In [None]:
## train test split for simple time series moving window average
train_split = int(len(uni_data) * 0.8)#4234
tf.random.set_seed(13)

# ### standardize data
# uni_data_mean = uni_data[:train_split].mean()
# uni_data_std = uni_data[:train_split].std()
# uni_data  = (uni_data - uni_data_mean)/ uni_data_std

print(type(uni_data))


Moving Window Average


1.   Given last 20 values of observations(temp) , predict next observation
2.   MWA: predict== AVG(last 20 values)




In [None]:
## utility functions

## funtion to create data for univariate forecasting

def univariate_data(dataset, start_idx , end_idx , history_size, target_size):
  data = []
  labels = []
  start_idx  = start_idx + history_size
  if end_idx is None:
    end_idx = len(dataset)- target_size
  for i in range(start_idx , end_idx):
    idxs = range(i-history_size , i)
    data.append(np.reshape(dataset[idxs] , (history_size, 1))) ### reshape data
    labels.append(dataset[i+target_size])
  return np.array(data), np.array(labels)

uni_data_history = 20   ## last 50 values
uni_data_future = 0     ## future data

x_train_uni , y_train_uni = univariate_data(uni_data , 0 , train_split , uni_data_history , uni_data_future)

x_val_uni , y_val_uni = univariate_data(uni_data , train_split , None ,uni_data_history , uni_data_future)

In [None]:
x_train_uni

In [None]:
print(x_train_uni.shape , y_train_uni.shape)
print(x_val_uni.shape , y_val_uni.shape)

In [None]:
print('Single window of history data' , x_train_uni[0])

print('Target Temperature to predict ' , y_train_uni[0])


In [None]:
### fucntion to create time steps
def create_time_steps(length):
  return list(range(-length,0))

### function to plot time series data

def plot_time_series(plot_data, delta , title):
  labels = ["History" , 'True Future' , 'Model Predcited']
  marker = ['.-' , 'rx' , 'go']
  time_steps = create_time_steps(plot_data[0].shape[0])

  if delta:
    future = delta
  else:
    future = 0
  plt.title(title)
  for i , x in enumerate(plot_data):
    if i :
      plt.plot(future , plot_data[i] , marker[i], markersize = 10 , label = labels[i])
    else:
      plt.plot(time_steps, plot_data[i].flatten(), marker[i], label = labels[i])
  plt.legend()
  plt.xlim([time_steps[0], (future+5) *2])

  plt.xlabel('Time_Step')
  return plt
## function to plot time series data



plot_time_series([x_train_uni[0] , y_train_uni[0]] , 0 , 'Sample Example')

In [None]:
i = 20
plot_time_series([x_train_uni[i], y_train_uni[i]] , 0 , 'Sample Example')

In [None]:
### Moving window average

def MWA(history):
  return np.mean(history)




In [None]:
i = 20
plot_time_series([x_train_uni[i] , y_train_uni[i] , MWA(x_train_uni[i])] , 0 , 'MWA predicted')

Univariate time-series forecasting


*   Only single feature as temperature(historical data)
*   Task:  Given last 20 observations(history) , predict next temperature value 



In [None]:
## prepare tensorflow dataset
batch_size = 256
buffer_size = 10000

train_uni = tf.data.Dataset.from_tensor_slices((x_train_uni , y_train_uni))
train_uni = train_uni.cache().shuffle(buffer_size).batch(batch_size).repeat()

val_uni = tf.data.Dataset.from_tensor_slices((x_val_uni , y_val_uni))
val_uni = val_uni.cache().shuffle(buffer_size).batch(batch_size).repeat()

print(train_uni)
print(val_uni)

In [None]:
## Define LSTM model 

lstm_model = tf.keras.models.Sequential([tf.keras.layers.LSTM(16 , input_shape = x_train_uni.shape[-2:]), 
                                         tf.keras.layers.Dense(1)])

lstm_model.compile(optimizer = 'adam', loss = 'mae')

steps = 200

EPOCHS =10

lstm_model.fit(train_uni , epochs = EPOCHS, steps_per_epoch = steps ,
               validation_data = val_uni, validation_steps = 50)




In [None]:
for i , j in val_uni.take(5):
  plot = plot_time_series([i[0].numpy() , j[0].numpy() , lstm_model.predict(i)[0]] ,0 , 'LSTM UNIVARIATE')
  plot.show()

Multivariate  and Single step Forecasting


*   Task: Given 3 features(temp , pressure , and density) at each time step can we predict the temp in future at single time step




In [None]:
## features 

# features_6 = ['temperature', 'humidity', 'pressure', 'global_irradiance', 'direct_irradiance', 'diffuse_irradiance']
features14 = [
    'temperature', 'humidity', 'pressure',
    'cloud_cover', 'wind_speed', 'wind_direction', 'poprecipitation'
]
features = df[features14]
features.head()



In [None]:
features.isnull().sum()
features=features.fillna(features.mean())

In [None]:
features.plot(subplots=True)

In [None]:
# ### standardize data
dataset = features.values
# dataset = np.array(features)
data_mean = dataset[:train_split].mean(axis =0)

data_std = dataset[:train_split].std(axis = 0)

dataset = (dataset - data_mean)/data_std



In [None]:
# # ### create mutlivariate data

# def multivariate_data(dataset, target, start_idx, end_idx, history_size, target_size, step, single_step=False):
#     data, labels = [], []
#     start_idx += history_size
#     if end_idx is None:
#         end_idx = len(dataset) - target_size
#     for i in range(start_idx, end_idx):
#         indices = range(i-history_size, i, step)
#         data.append(dataset[indices])
#         if single_step:
#             labels.append(target[i+target_size])
#         else:
#             labels.append(target[i:i+target_size])
#     return np.array(data), np.array(labels)
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
    data = []
    labels = []

    # Adjust end_index to avoid out-of-bounds error
    end_index = end_index if end_index is not None else len(dataset) - target_size
    
    # Adjust start_index to accommodate history_size
    start_index = start_index + history_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])
    
    return np.array(data), np.array(labels)


In [None]:
# ### generate multivariate data
from sklearn.preprocessing import StandardScaler
# history = 720
# future_target = 72
# STEP = 6

# x_train_ss, y_train_ss = multivariate_data(dataset, dataset[:, 1], 0, train_split, history,
#                                            future_target, STEP, single_step=True)


# x_val_ss , y_val_ss = multivariate_data(dataset , dataset[:,1] , train_split , None , history,
#                                         future_target, STEP, single_step = True)


# print(x_train_ss.shape , y_train_ss.shape)
# Define your parameters
# Define your parameters
history = 720
future_target = 72
STEP = 6
train_split = int(len(dataset) * 0.7)
# scaler = StandardScaler()
# scaler.fit(dataset[:train_split])

# # Transform the entire dataset
# dataset = scaler.transform(dataset)
# Get training data
x_train_ss, y_train_ss = multivariate_data(dataset, dataset[:, 1], 0, train_split, history, future_target, STEP, single_step=True)

# Get validation data
x_val_ss, y_val_ss = multivariate_data(dataset, dataset[:, 1], train_split, None, history, future_target, STEP, single_step=True)

# Check shapes
print(x_train_ss.shape, y_train_ss.shape)
print(x_val_ss.shape, y_val_ss.shape)



In [None]:
x_train_ss

In [None]:
## tensorflow dataset

train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()

val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()

print(train_ss)
print(val_ss)



In [None]:
### Modelling using LSTM
from keras.callbacks import EarlyStopping
callbacks = EarlyStopping(
    patience = 10 , 
    restore_best_weights = True , 
    monitor = 'val_loss'
)
single_step_model = tf.keras.models.Sequential()

single_step_model.add(tf.keras.layers.LSTM(16, return_sequences=True,input_shape = x_train_ss.shape[-2:]))
# single_step_model.add(tf.keras.layers.LSTM(16,return_sequences=False))
# single_step_model.add(tf.keras.layers.Dense(4, activation="relu"))
single_step_model.add(tf.keras.layers.Dense(1))
single_step_model.compile(optimizer = tf.keras.optimizers.Adam(clipvalue=1.0,weight_decay=1e-6), loss = 'mae')
single_step_model.summary()


single_step_model_history = single_step_model.fit(train_ss, epochs = EPOCHS ,
                                                  steps_per_epoch =steps,verbose=1, validation_data = val_ss,
                                                  validation_steps = 50)


In [None]:
## plot train test loss 

def plot_loss(history , title):
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(loss))
  plt.figure()
  plt.plot(epochs, loss , 'b' , label = 'Train Loss')
  plt.plot(epochs, val_loss , 'r' , label = 'Validation Loss')
  plt.title(title)
  plt.legend()
  plt.grid()
  plt.show()

plot_loss(single_step_model_history , 'Single Step Training and validation loss')

In [None]:
# plot time series and predicted values

for x, y in val_ss.take(5):
  plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
                    single_step_model.predict(x)[0]], 12,
                   'Single Step Prediction')
  plot.show()

Multi-variate & multi-step forecasting
-> Generate multiple future values of temperature

In [None]:
future_target = 72 # 72 future values
x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 1], 0,
                                                 train_split, history,
                                                 future_target, STEP)
x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 1],
                                             train_split, None, history,
                                             future_target, STEP)

print(x_train_multi.shape)
print(y_train_multi.shape)

In [None]:
# TF DATASET

train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(buffer_size).batch(batch_size).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(batch_size).repeat()

In [None]:
#plotting function
def multi_step_plot(history, true_future, prediction):
  plt.figure(figsize=(12, 6))
  num_in = create_time_steps(len(history))
  num_out = len(true_future)
  plt.grid()
  plt.plot(num_in, np.array(history[:, 1]), label='History')
  plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',
           label='True Future')
  if prediction.any():
    plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro',
             label='Predicted Future')
  plt.legend(loc='upper left')
  plt.show()
  


for x, y in train_data_multi.take(1):
  multi_step_plot(x[0], y[0], np.array([0]))

In [None]:
# multi_step_model = tf.keras.models.Sequential()
# multi_step_model.add(tf.keras.layers.LSTM(16,
#                                           return_sequences=True,
#                                           input_shape=x_train_multi.shape[-2:]))
# multi_step_model.add(tf.keras.layers.LSTM(32,return_sequences=False, activation='relu'))
# multi_step_model.add(tf.keras.layers.Dense(4))
# multi_step_model.add(tf.keras.layers.Dense(72)) # for 72 outputs

# multi_step_model.compile(optimizer=tf.keras.optimizers.SGD(clipvalue=1.0,weight_decay=1e-6), loss='mae')
# multi_step_model.summary()
# multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
#                                           steps_per_epoch=steps,
#                                           validation_data=val_data_multi,
#                                           validation_steps=50,callbacks = [callbacks])



# Define the model
multi_step_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, input_shape=x_train_multi.shape[-2:]),
    # tf.keras.layers.Dropout(0.2),  # Dropout layer for regularization
    tf.keras.layers.LSTM(16, return_sequences=False, activation='relu'),
    # tf.keras.layers.Dropout(0.2),  # Dropout layer for regularization
    # tf.keras.layers.LSTM(2, return_sequences=False, activation='relu'),
    tf.keras.layers.Dense(4, activation='relu'),  # Additional dense layer
    tf.keras.layers.Dense(72)  # 72 outputs for future predictions
])

# Compile the model
multi_step_model.compile(optimizer=tf.keras.optimizers.Adam(clipvalue=1.0,weight_decay=1e-6), loss='mae')

# Print the model summary
multi_step_model.summary()

# Define callbacks for early stopping and learning rate scheduling
# 

# Fit the model
enhanced_history = multi_step_model.fit(train_data_multi, 
                                      epochs=EPOCHS,
                                      steps_per_epoch=steps,
                                      validation_data=val_data_multi,
                                      validation_steps=50
                                    )

In [None]:
plot_loss(enhanced_history, 'Multi-Step Training and validation loss')


In [None]:
for x, y in val_data_multi.take(5):
  multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])

In [None]:
mae_lstm_single = single_step_model.evaluate(val_ss, steps=100)
mae_lstm_multi=multi_step_model.evaluate(val_data_multi, steps=100)

In [None]:
from tensorflow.keras.models import load_model

# Load the model
loaded_model = load_model('/Users/faymajidelhassan/Downloads/Master project /CODE/EDA/Saved_models/Lstm_multi_step_model_measure+precip.h5')
loaded_model2 = load_model('/Users/faymajidelhassan/Downloads/Master project /CODE/EDA/Saved_models/Lstm_single_step_model_measure+precip.h5')

# Optionally, you can verify the model by making predictions
for x, y in val_data_multi.take(5):

    predictions = loaded_model.predict(x)
    multi_step_plot(x[0], y[0], loaded_model.predict(x)[0])


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.losses import MeanAbsoluteError
import numpy as np
import matplotlib.pyplot as plt

# Define a dictionary with custom objects if needed
custom_objects = {'mae': MeanAbsoluteError()}

# Load the models
loaded_model = load_model('/Users/faymajidelhassan/Downloads/Master project /CODE/EDA/Saved_models/Lstm_multi_step_model_measure+precip.h5', custom_objects=custom_objects)
loaded_model2 = load_model('/Users/faymajidelhassan/Downloads/Master project /CODE/EDA/Saved_models/Lstm_single_step_model_measure+precip.h5', custom_objects=custom_objects)

print("Models loaded successfully")

# Define the multi-step plot function
def multi_step_plot(history, true_future, prediction):
    plt.figure(figsize=(12, 6))
    num_in = list(range(-len(history), 0))
    num_out = list(range(len(true_future)))

    plt.plot(num_in, np.array(history[:, 1]), label='History')
    plt.plot(num_out, np.array(true_future), 'bo-', label='True Future')  # Change 'bo' to 'bo-' to plot line with markers
    plt.plot(num_out, np.array(prediction), 'ro-', label='Predicted Future')  # Change 'ro' to 'ro-' to plot line with markers

    plt.legend(loc='upper left')
    plt.xlabel('Time Steps')
    plt.ylabel('Value')
    plt.title('Multi-Step Forecasting')
    plt.grid(True)
    plt.show()

# Use the loaded model to make predictions and plot them
for x, y in val_data_multi.take(5):
    prediction = loaded_model.predict(x)[0]
    multi_step_plot(x[0], y[0], prediction)


In [None]:
mae_lstm_multi=loaded_model.evaluate(val_data_multi, steps=100)

In [None]:
for x, y in val_ss.take(5):
    prediction = transformer_model.predict(x)
    plot = plot_time_series([x[0].numpy(), y[0].numpy(), prediction[0]], 'Transformer UNIVARIATE')
    plot.show()
