# Packages

In [1]:
import pandas as pd 
import numpy as np

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

In [3]:
from keras.layers import LSTM

In [4]:
from keras.models import Model

In [5]:
from keras.layers import Dense, Input, Dropout
from keras.models import Sequential

# Dataset

In [7]:
df = pd.read_csv('DataSets /WORLD-OWID-Features')

In [8]:
#df['Date'] = pd.to_datetime(df['date'], format="%Y-%m-%d")

In [9]:
df = df.set_index('year')

In [10]:
df.head()

Unnamed: 0_level_0,cement_co2,co2,coal_co2,flaring_co2,gas_co2,land_use_change_co2,oil_co2,other_industry_co2
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1750,317.372711,9.351,9.351,256.309556,1814.698707,4316.805465,3625.609168,82.069576
1751,317.372711,9.351,9.351,256.309556,1814.698707,4316.805465,3625.609168,82.069576
1752,317.372711,9.354,9.354,256.309556,1814.698707,4316.805465,3625.609168,82.069576
1753,317.372711,9.354,9.354,256.309556,1814.698707,4316.805465,3625.609168,82.069576
1754,317.372711,9.358,9.358,256.309556,1814.698707,4316.805465,3625.609168,82.069576


# Creating our lookback window 

In [6]:
def LookBackData(data, window):
    LookBackData = []
    for point in range(window,int(data.shape[0])):
        LookBackData.append(list(data.loc[point-window:point-1]))
    LookBackData = np.array(LookBackData)
    return LookBackData 

# defining our X and Y 

In [13]:
window = 5 # Making predictions based on the previous week 

In [16]:
df_ = df.reset_index()

In [13]:
#df_

In [14]:
y = df['co2']
y_window = y[window:].copy()
y_window.shape

(267,)

In [17]:
X = LookBackData(df_['co2'], window)
X.shape

(267, 5)

# Splitting our data 

In [22]:
len_test = len(df[df.index > 2000])

In [23]:
X_train = X[len_test:]
X_train.shape

(246, 5)

In [24]:
X_test = X[:len_test]
X_test.shape

(21, 5)

In [25]:
y_train = y_window[len_test:]
y_train.shape

(246,)

In [26]:
y_test = y_window[:len_test]
y_test.shape

(21,)

# Scaling our variables 

In [11]:
'''scaler = MinMaxScaler(feature_range=(0,1))

X_train_Scaled = scaler.fit_transform(X_train)
X_test_Scaled = scaler.transform(X_test)
y_train_Scaled = scaler.fit_transform(np.array(y_train).reshape(-1, 1))
y_test = scaler.transform(np.array(y_test).reshape(-1, 1))'''

'scaler = MinMaxScaler(feature_range=(0,1))\n\nX_train_Scaled = scaler.fit_transform(X_train)\nX_test_Scaled = scaler.transform(X_test)\ny_train_Scaled = scaler.fit_transform(np.array(y_train).reshape(-1, 1))\ny_test = scaler.transform(np.array(y_test).reshape(-1, 1))'

# Evaluation & Visualization Helper Functions

In [27]:
def eval_metrics(y_test, y_pred):
  R2 = r2_score(y_test, y_pred)
  Max_error = max_error(y_test, y_pred)
  MAE = mean_absolute_error(y_test, y_pred)
  MAPE = mean_absolute_percentage_error(y_test, y_pred)
  return R2, Max_error, MAE, MAPE

In [28]:
def print_model_results(model_name,y_test,y_pred):
    R2, ME, MAE, MAPE = eval_metrics(y_test,y_pred)
    print(f'{model_name}:  R^2= {R2:.4f},  ME = {ME:.4f},  MAE = {MAE:.4f},  MAPE = {MAPE:.4f}')
    return None

In [29]:
def plot_results(model_name,y_test,y_pred):
    R2, ME, MAE, MAPE = eval_metrics(y_test,y_pred)
    df_plot = pd.DataFrame({'actual':y_test, 'predicted':y_pred})
    df_plot = df_plot.reset_index()
    df_plot = df_plot.drop(columns=['Year'])
    date = pd.date_range(start='2000-01-01', end='2021-01-01')
    df_plot = df_plot.set_index(date)
    df_plot.plot()
    plt.ylabel('MtCO2 Value')
    plt.title(f' {model_name}:  R^2= {R2:.4f},  ME = {ME:.4f},  MAE = {MAE:.4f},  MAPE = {MAPE:.4f}')
    plt.ylim(84,111)

    #file_name =  f'{model_name:}.png'
    #plt.savefig(file_name)
    return None

# LSTM Autoregressive

In [23]:
input_layer = Input(shape=(7,1), dtype='float32')
lstm_layer = LSTM(64, input_shape=(7,1), return_sequences=False)(input_layer)
dropout_layer = Dropout(0.2)(lstm_layer)
output_layer = Dense(1, activation='linear')(dropout_layer)

In [24]:
lstm = Model(inputs=input_layer, outputs=output_layer)
lstm.compile(loss='mae', optimizer='adam')
lstm.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 7, 1)]            0         
                                                                 
 lstm (LSTM)                 (None, 64)                16896     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 16961 (66.25 KB)
Trainable params: 16961 (66.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [25]:
lstm.fit(X_train_Scaled, y_train_Scaled, epochs=20, batch_size=16, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x2a9dea390>

In [27]:
pred = lstm.predict(X_test_Scaled)

