In [32]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

In [33]:
data = pd.read_csv("../data.csv")
data.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Australia,Europe,Brazil,Canada,China,Denmark,Hong Kong,India,Japan,Malaysia
0,0,2010-01-04,0.9133,1.4419,1.72,1.0377,6.8273,5.1597,7.7555,46.27,92.55,3.396
1,1,2010-01-05,0.9143,1.4402,1.7296,1.0371,6.8258,5.1668,7.7564,46.13,91.48,3.385
2,2,2010-01-06,0.9189,1.4404,1.7292,1.0333,6.8272,5.1638,7.7546,45.72,92.53,3.379
3,3,2010-01-07,0.9168,1.4314,1.7409,1.0351,6.828,5.1981,7.7539,45.67,93.31,3.368
4,4,2010-01-08,0.9218,1.4357,1.7342,1.0345,6.8274,5.1827,7.7553,45.5,92.7,3.375


In [34]:
data.drop("Unnamed: 0.1", axis = 1, inplace = True)
data.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
data["Date"] = pd.to_datetime(data["Date"])
data.set_index("Date", inplace=True)
data.replace(0, np.nan, inplace=True)

In [35]:
data

Unnamed: 0_level_0,Australia,Europe,Brazil,Canada,China,Denmark,Hong Kong,India,Japan,Malaysia
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-04,0.9133,1.4419,1.7200,1.0377,6.8273,5.1597,7.7555,46.27,92.55,3.3960
2010-01-05,0.9143,1.4402,1.7296,1.0371,6.8258,5.1668,7.7564,46.13,91.48,3.3850
2010-01-06,0.9189,1.4404,1.7292,1.0333,6.8272,5.1638,7.7546,45.72,92.53,3.3790
2010-01-07,0.9168,1.4314,1.7409,1.0351,6.8280,5.1981,7.7539,45.67,93.31,3.3680
2010-01-08,0.9218,1.4357,1.7342,1.0345,6.8274,5.1827,7.7553,45.50,92.70,3.3750
...,...,...,...,...,...,...,...,...,...,...
2019-12-27,0.6978,1.1174,4.0507,1.3073,6.9954,6.6829,7.7874,71.45,109.47,4.1260
2019-12-28,,,,,,,,,,
2019-12-29,,,,,,,,,,
2019-12-30,0.7004,1.1217,4.0152,1.3058,6.9864,6.6589,7.7857,71.30,108.85,4.1053


In [36]:
data.interpolate(method='linear', limit_direction='forward')

Unnamed: 0_level_0,Australia,Europe,Brazil,Canada,China,Denmark,Hong Kong,India,Japan,Malaysia
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-04,0.913300,1.441900,1.720000,1.0377,6.8273,5.1597,7.755500,46.27,92.550000,3.3960
2010-01-05,0.914300,1.440200,1.729600,1.0371,6.8258,5.1668,7.756400,46.13,91.480000,3.3850
2010-01-06,0.918900,1.440400,1.729200,1.0333,6.8272,5.1638,7.754600,45.72,92.530000,3.3790
2010-01-07,0.916800,1.431400,1.740900,1.0351,6.8280,5.1981,7.753900,45.67,93.310000,3.3680
2010-01-08,0.921800,1.435700,1.734200,1.0345,6.8274,5.1827,7.755300,45.50,92.700000,3.3750
...,...,...,...,...,...,...,...,...,...,...
2019-12-27,0.697800,1.117400,4.050700,1.3073,6.9954,6.6829,7.787400,71.45,109.470000,4.1260
2019-12-28,0.698667,1.118833,4.038867,1.3068,6.9924,6.6749,7.786833,71.40,109.263333,4.1191
2019-12-29,0.699533,1.120267,4.027033,1.3063,6.9894,6.6669,7.786267,71.35,109.056667,4.1122
2019-12-30,0.700400,1.121700,4.015200,1.3058,6.9864,6.6589,7.785700,71.30,108.850000,4.1053


In [37]:
data.interpolate(method='linear', limit_direction='forward', inplace=True)
LOOK_BACK = 30
PREDICT_DAY = 1
SPLIT_RATIO = 0.8

In [38]:
def Create_Data(
    data, lookback=LOOK_BACK, pred_len=PREDICT_DAY, split_ratio=SPLIT_RATIO
):
    if lookback < 2:
        print("ERROR: Lookback too small")
        return -1

    # declarations

    x = {}
    y = {}
    xtr = {}
    xt = {}
    ytr = {}
    yt = {}
    scalers = {}

    # Creating stepped data

    for i in data.columns:
        xtemp = pd.DataFrame(data[i])
        for j in range(1, lookback + 1):
            xtemp[i + str(j)] = data[i].shift(-1 * j)
        x[i] = xtemp.dropna()

    # Splitting data into x and y

    for i in x.keys():
        y[i] = pd.DataFrame(x[i].iloc[:, -pred_len])
        x[i] = x[i].iloc[:, :-pred_len]

    # Normalizing x and y values

    for i in x.keys():
        scalers[i + "_x"] = MinMaxScaler(feature_range=(0, 1))
        x[i] = scalers[i + "_x"].fit_transform(x[i])
        scalers[i + "_y"] = MinMaxScaler(feature_range=(0, 1))
        y[i] = scalers[i + "_y"].fit_transform(y[i])

    # setting train and test sizes

    tr_len = int(split_ratio * y["India"].shape[0])
    t_len = y["India"].shape[0] - tr_len

    # creating training and testing data

    for i in x.keys():
        xtr[i] = x[i][:tr_len]
        ytr[i] = y[i][:tr_len]
        xt[i] = x[i][-t_len:]
        yt[i] = y[i][-t_len:]

    # returning pertinent data

    return x, y, xtr, xt, ytr, yt, scalers

In [39]:
x,y,xtr,xt,ytr,yt,scalers = Create_Data(data)

In [40]:
def Create_model(x, lookback=LOOK_BACK):
    models = {}
    for key in x.keys():
        input_dim = x[key].shape[
            1
        ]  # Assuming x[key] is a 2D numpy array, where the second dimension is the feature size

        model = Sequential()

        # Convolutional Layers
        model.add(Conv1D(filters=64, kernel_size=7, activation='relu', input_shape=(input_dim, 1)))
        model.add(Conv1D(filters=64, kernel_size=7, activation='relu'))
        model.add(MaxPooling1D(pool_size=1))
        model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
        model.add(Conv1D(filters=32, kernel_size=5, activation='relu'))
        model.add(MaxPooling1D(pool_size=1))
        model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
        model.add(Conv1D(filters=16, kernel_size=3, activation='relu'))
        model.add(MaxPooling1D(pool_size=1))
        model.add(Flatten())

        # Dense Layers
        model.add(Dense(256, activation="relu"))
        model.add(Dense(128, activation="relu"))
        model.add(Dropout(0.2))
        model.add(Dense(64, activation="relu"))
        model.add(Dense(32, activation="relu"))

        # Output Layer
        model.add(Dense(1, activation="linear"))

        model.compile(optimizer="adam", loss="mean_squared_error")

        models[key] = model

    return models

In [41]:
m = Create_model(x,y)

In [42]:
for key in xtr:
    print(f"Shape of xtr[{key}] = {xtr[key].shape}")
    print(f"Shape of xt[{key}] = {xt[key].shape}")

Shape of xtr[Australia] = (2894, 30)
Shape of xt[Australia] = (724, 30)
Shape of xtr[Europe] = (2894, 30)
Shape of xt[Europe] = (724, 30)
Shape of xtr[Brazil] = (2894, 30)
Shape of xt[Brazil] = (724, 30)
Shape of xtr[Canada] = (2894, 30)
Shape of xt[Canada] = (724, 30)
Shape of xtr[China] = (2894, 30)
Shape of xt[China] = (724, 30)
Shape of xtr[Denmark] = (2894, 30)
Shape of xt[Denmark] = (724, 30)
Shape of xtr[Hong Kong] = (2894, 30)
Shape of xt[Hong Kong] = (724, 30)
Shape of xtr[India] = (2894, 30)
Shape of xt[India] = (724, 30)
Shape of xtr[Japan] = (2894, 30)
Shape of xt[Japan] = (724, 30)
Shape of xtr[Malaysia] = (2894, 30)
Shape of xt[Malaysia] = (724, 30)


In [43]:
def Execute_model(model, xtr, ytr, xt, yt, scaler):
    MAPE = {}
    MAE = {}
    MSE = {}
    for i in model.keys():
        print(i)
        # Creating EarlyStopping and ReduceLROnPlateau callbacks
        es = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
        reduce_lr = ReduceLROnPlateau(
            monitor="val_loss", factor=0.2, patience=5, min_lr=0.0001
        )

        # Reshaping data for Conv1D
        xtr_reshaped = np.reshape(xtr[i], (xtr[i].shape[0], xtr[i].shape[1], 1))
        xt_reshaped = np.reshape(xt[i], (xt[i].shape[0], xt[i].shape[1], 1))

        # Training the model with EarlyStopping and ReduceLROnPlateau callbacks
        model[i].fit(
            xtr_reshaped,
            ytr[i],
            epochs=100,
            batch_size=64,
            verbose=1,
            validation_split=0.2,
            callbacks=[es, reduce_lr],
        )

        # collecting predicted and actual values
        temp = model[i].predict(xt_reshaped)
        pred = scaler[i + "_y"].inverse_transform(temp)
        act = scaler[i + "_y"].inverse_transform(yt[i])

        # calculating Mean Square Error, Mean Absolute Error, and Mean Absolute Error
        MSE[i] = mean_squared_error(act, pred)
        MAE[i] = mean_absolute_error(act, pred)
        MAPE[i] = mean_absolute_percentage_error(act, pred)

    # Tabulating Data
    results = pd.DataFrame([MSE, MAE, MAPE])
    results["Metric"] = ["MSE", "MAE", "MAPE"]
    results.set_index("Metric", inplace=True)

    return results

In [44]:
result = Execute_model(m,xtr,ytr,xt,yt,scalers)

Australia
Epoch 1/100


2023-10-11 10:18:32.386611: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
2023-10-11 10:18:33.111958: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55bed68d2ec0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-10-11 10:18:33.112003: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti, Compute Capability 7.5
2023-10-11 10:18:33.119956: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-10-11 10:18:33.314666: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 12: early stopping
Europe
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 32: early stopping
Brazil
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 17: early stopping
Canada
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 11: early 

In [45]:
result

Unnamed: 0_level_0,Australia,Europe,Brazil,Canada,China,Denmark,Hong Kong,India,Japan,Malaysia
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
MSE,0.00033,0.000143,0.108426,0.001396,0.012734,0.030133,0.00023,6.920421,21.101518,0.013783
MAE,0.014583,0.009042,0.314011,0.035867,0.098758,0.166692,0.014499,2.535091,4.490945,0.115425
MAPE,0.02051,0.00767,0.081822,0.027267,0.014425,0.02549,0.001849,0.03638,0.040855,0.028164
