<a href="https://colab.research.google.com/github/RifatMuhtasim/Data_Science/blob/main/Time_Series_Forecast_Model/LSTM_Univariate_Time_Series_Forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM

In [None]:
bangladesh_co2_df = pd.DataFrame({
    "year" : [2000, 2001, 2002, 2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,],
    "co2_emissions": [0.160122047, 0.194686761, 0.203401188, 0.209737198, 0.219955044, 0.232135397, 0.251721898, 0.263586595,  0.285933318, 0.305030148, 0.340233253, 0.361557397,  0.387829235, 0.408788179,  0.425189457, 0.463517075, 0.507739270, 0.541787825, 0.586157624, 0.559733655, 0.510647559]
})

bangladesh_co2_df

Unnamed: 0,year,co2_emissions
0,2000,0.160122
1,2001,0.194687
2,2002,0.203401
3,2003,0.209737
4,2004,0.219955
5,2005,0.232135
6,2006,0.251722
7,2007,0.263587
8,2008,0.285933
9,2009,0.30503


# Data Preprocessing

In [None]:
train_dataset = bangladesh_co2_df[bangladesh_co2_df['year'] <= 2015]
val_dataset = bangladesh_co2_df[bangladesh_co2_df['year'] > 2015]

In [None]:
def Prepare_data(timeseries_data, n_features):
    X, y = [], []

    for i in range(len(timeseries_data)):
        # Find the end of this pattern
        end_ix = i + n_features

        if end_ix > len(timeseries_data) - 1:
            break

        # Gather Input and Output Parts of the pattern
        seq_X, seq_y = timeseries_data[i: end_ix], timeseries_data[end_ix]
        X.append(seq_X)
        y.append(seq_y)

    return np.array(X), np.array(y)

In [None]:
timeseries_data = train_dataset["co2_emissions"]
n_steps = 5
X_train, y_train = Prepare_data(timeseries_data, n_steps)

In [None]:
print(X_train)

[[0.16012205 0.19468676 0.20340119 0.2097372  0.21995504]
 [0.19468676 0.20340119 0.2097372  0.21995504 0.2321354 ]
 [0.20340119 0.2097372  0.21995504 0.2321354  0.2517219 ]
 [0.2097372  0.21995504 0.2321354  0.2517219  0.26358659]
 [0.21995504 0.2321354  0.2517219  0.26358659 0.28593332]
 [0.2321354  0.2517219  0.26358659 0.28593332 0.30503015]
 [0.2517219  0.26358659 0.28593332 0.30503015 0.34023325]
 [0.26358659 0.28593332 0.30503015 0.34023325 0.3615574 ]
 [0.28593332 0.30503015 0.34023325 0.3615574  0.38782923]
 [0.30503015 0.34023325 0.3615574  0.38782923 0.40878818]
 [0.34023325 0.3615574  0.38782923 0.40878818 0.42518946]]


In [None]:
print(y_train)

[0.2321354  0.2517219  0.26358659 0.28593332 0.30503015 0.34023325
 0.3615574  0.38782923 0.40878818 0.42518946 0.46351708]


In [None]:
X_train.shape

(11, 5)

In [None]:
# Reshape from [samples, timesteps] into [samples, timesteps, features]

n_features = 1
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features ))
X_train.shape

(11, 5, 1)

# Building LSTM Model

In [None]:
lstm_model = Sequential([
    LSTM(64, input_shape=(n_steps, n_features), activation="relu", return_sequences=True),
    LSTM(32, activation="relu"),
    Dense(1)
])

lstm_model.compile(optimizer="adam", loss="mse")
print(lstm_model.summary())



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 64)             16896     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 29345 (114.63 KB)
Trainable params: 29345 (114.63 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [None]:
history = lstm_model.fit(X_train, y_train, epochs=1000, verbose=0)

# Extract loss and accuracy values of the last epoch
last_epoch_loss = history.history['loss'][-1]
print("Last Epoch Loss:", last_epoch_loss)

Last Epoch Loss: 0.00016035708540584892


# Predict the Next 10 Years Co2 Predictions

In [None]:
def LSTM_Prediction(lstm_model, timeseries_data, n_steps, n_features, num_of_predictions):
    lstm_output = []
    timeseries_array = np.array(timeseries_data)

    for i in range(num_of_predictions):
        x_input = np.array(timeseries_array[-n_steps:])
        print(f"{i+2016} year input: {x_input}")
        x_input = x_input.reshape((1, n_steps, n_features))
        y_pred = lstm_model.predict(x_input, verbose=0)
        print(f"{i+2016} year output: {y_pred[0][0]}")
        lstm_output.append(y_pred[0][0])
        timeseries_array = np.append(timeseries_array, y_pred[0][0])

    return lstm_output

In [None]:
lstm_output = LSTM_Prediction(lstm_model, timeseries_data, n_steps, n_features, 10)

2016 year input: [0.3615574  0.38782923 0.40878818 0.42518946 0.46351708]
2016 year output: 0.5118192434310913
2017 year input: [0.38782923 0.40878818 0.42518946 0.46351708 0.51181924]
2017 year output: 0.5544326305389404
2018 year input: [0.40878818 0.42518946 0.46351708 0.51181924 0.55443263]
2018 year output: 0.6038224697113037
2019 year input: [0.42518946 0.46351708 0.51181924 0.55443263 0.60382247]
2019 year output: 0.6677917242050171
2020 year input: [0.46351708 0.51181924 0.55443263 0.60382247 0.66779172]
2020 year output: 0.7579560279846191
2021 year input: [0.51181924 0.55443263 0.60382247 0.66779172 0.75795603]
2021 year output: 0.8729643821716309
2022 year input: [0.55443263 0.60382247 0.66779172 0.75795603 0.87296438]
2022 year output: 1.022853136062622
2023 year input: [0.60382247 0.66779172 0.75795603 0.87296438 1.02285314]
2023 year output: 1.244324803352356
2024 year input: [0.66779172 0.75795603 0.87296438 1.02285314 1.2443248 ]
2024 year output: 1.5991847515106201
202

In [None]:
pred_result_df = pd.DataFrame(columns=["year", "co2_emissions"])

for idx, value in enumerate(lstm_output):
    new_df = pd.DataFrame({
        "year": [idx + 2016],
        "co2_emissions": [value]
    })
    pred_result_df = pd.concat([pred_result_df, new_df], ignore_index=True)

train_dataset = pd.concat([train_dataset, pred_result_df], ignore_index=True)

In [None]:
train_dataset

Unnamed: 0,year,co2_emissions
0,2000,0.160122
1,2001,0.194687
2,2002,0.203401
3,2003,0.209737
4,2004,0.219955
5,2005,0.232135
6,2006,0.251722
7,2007,0.263587
8,2008,0.285933
9,2009,0.30503


In [None]:
train_dataset_for_evaluate = train_dataset[(train_dataset['year'] > 2015) & (train_dataset['year'] <= 2020)]
train_dataset_for_evaluate

Unnamed: 0,year,co2_emissions
16,2016,0.511819
17,2017,0.554433
18,2018,0.603822
19,2019,0.667792
20,2020,0.757956


In [None]:
val_dataset

Unnamed: 0,year,co2_emissions
16,2016,0.507739
17,2017,0.541788
18,2018,0.586158
19,2019,0.559734
20,2020,0.510648


In [None]:
from sklearn.metrics import r2_score, mean_squared_error
import math

rmse = math.sqrt(mean_squared_error(val_dataset['co2_emissions'],  train_dataset_for_evaluate['co2_emissions']))
rmse

0.12110046177959254