In [1]:
import torch
from torch import nn, optim
from Functions.SSA import SSA

import os
import arch
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import numpy as np

import matplotlib.pyplot as plt

# воспроизводимость
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
np.random.seed(42)
import random
random.seed(42)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [2]:
def wape(y, y_hat):
    return np.sum(np.abs(y - y_hat)) / np.sum(np.abs(y)) * 100

def mape(y, y_hat, eps= 1e-7):
    return np.mean(np.abs((y - y_hat) / (y + eps))) * 100

def series_to_X_y(series, windows_size= 30):
    X = []
    y = []

    for i in range(len(series) - windows_size):
        row = series[i:i + windows_size]
        X.append(row)
        column = [series[i + windows_size]]
        y.append(column)

    return np.asmatrix(X), np.asarray(y)

class MLP(nn.Module):
    def __init__(self, neurons, use_dropout= False, use_batchnorm= False):
        super(MLP, self).__init__()

        layers = []
        for i in range(1, len(neurons)):
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(neurons[i - 1]))
            layers.append(nn.Linear(in_features= neurons[i - 1], out_features= neurons[i]))
            layers.append(nn.ReLU())
            if use_dropout:
                layers.append(nn.Dropout(0.2))

        self.fc = nn.Sequential(*(layers[:-2] if use_dropout else layers[:-1]))

    def forward(self, X):
        return self.fc(X)

def get_wapes(path, country, prices_not_returns, windows_size, output_size, neurons, metrics, use_dropout, use_batchnorm, epochs, val_size, test_size, device, use_denoising, use_error_modeling):

    neurons = [windows_size, *neurons, output_size]

    result_dict = dict()

    entries = os.listdir(path)
    entries.remove(".DS_Store")

    pbar_company = tqdm(entries)
    for file in pbar_company:
        pbar_company.set_description(f"{country}: Epoch")
        df = pd.read_csv(path + file if path[-1] == r"/" else path + r"/" + file).dropna()
        series = np.array(df.Open) if prices_not_returns else np.array(df.Open.pct_change())[1:]
        series_initial = series[:]

        if use_denoising:
            _, series = SSA.multiple_stage_denoising(series, max_iter= 50)

        X, _ = series_to_X_y(series, windows_size)
        _, y = series_to_X_y(series_initial, windows_size)

        X = torch.from_numpy(X).to(device).to(torch.float32)
        y = torch.from_numpy(y).to(device).to(torch.float32)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= test_size, shuffle= False)
        # X_train_initial, X_test_initial, y_train_initial, y_test_initial = train_test_split(X_initial, y_initial, test_size= test_size, shuffle= False)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size= val_size, shuffle= False)

        X_train = X_train.to(device)
        X_val = X_val.to(device)
        X_test = X_test.to(device)

        y_train = y_train.to(device)
        y_val = y_val.to(device)
        y_test = y_test.to(device)

        model = MLP(neurons, use_dropout= use_dropout, use_batchnorm= use_batchnorm).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.LBFGS(model.parameters())

        best_model = None
        best_val_loss = torch.inf

        pbar_train = tqdm(range(epochs), desc= "Epoch")
        for epoch in pbar_train:
            def closure():
                model.train()
                optimizer.zero_grad()

                train_pred = model(X_train)

                global train_loss
                train_loss = criterion(y_train, train_pred)

                train_loss.backward()
                return train_loss

            optimizer.step(closure)

            model.eval()
            with torch.inference_mode():
                val_preds = model(X_val)
                val_loss = criterion(y_val, val_preds)

            if val_loss.item() < best_val_loss:
                best_val_loss = val_loss.item()
                best_model = model.state_dict()
                torch.save(best_model, "best_model.pt")

            pbar_train.set_description(f"{file[:-14]}: Epoch: {epoch + 1}/{epochs}. Train loss: {train_loss.item():.5f} Val loss: {val_loss.item():.5f}")

        model = MLP(neurons, use_dropout= use_dropout, use_batchnorm= use_batchnorm).to(device)
        model.load_state_dict(torch.load("best_model.pt"))
        model.eval()

        # train_residuals = None
        # test_resudials = None
        #
        # with torch.inference_mode():
        #     train_preds = model(torch.concat([X_train, X_val], dim= 0)).detach().cpu().numpy()
        #     test_preds = model(X_test).detach().cpu().numpy()
        #
        #     train_residuals = y_train_initial - train_preds
        #     test_residuals = y_test_initial - test_preds
        #
        #
        # print(train_residuals.shape, X_train_initial.shape, train_preds.shape)
        # arch_model = arch.arch_model(train_residuals, mean= "constant", vol= "FIGARCH", p= 0, q= 1, dist= "normal")
        # arch_model_fitted = arch_model.fit()
        # # forecast =
        # print(arch_model_fitted.summary())
        #
        #
        #
        # break
        #
        # if use_error_modeling:
        #     plt.plot(residuals)
        #     plt.show()

        with torch.inference_mode():
            y_hat = model(X_test).detach().cpu().numpy()
            y_test = y_test.detach().cpu().numpy()
            metrics_val = metrics(y_test, y_hat)


        # break

        result_dict[file[:-14]] = metrics_val

        print(f"{file[:-14]}: Test error: {result_dict[file[:-14]]:.2f}%")
        os.remove("best_model.pt")


    return result_dict


In [3]:
params_us_prices = {
    "path": "../Data/American Companies",
    "country": "US",
    "prices_not_returns": True,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "use_denoising": True,
    "use_error_modeling": True
}

params_us_returns = {
    "path": "../Data/American Companies",
    "country": "US",
    "prices_not_returns": False,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "use_denoising": True,
    "use_error_modeling": True
}

params_ch_prices = {
    "path": "../Data/Chinese Companies",
    "country": "CH",
    "prices_not_returns": True,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "use_denoising": True,
    "use_error_modeling": True
}

params_ch_returns = {
    "path": "../Data/Chinese Companies",
    "country": "CH",
    "prices_not_returns": False,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "use_denoising": True,
    "use_error_modeling": True
}

In [None]:
result_dict_prices_us = get_wapes(**params_us_prices)

In [15]:
result_dict_prices_us

{'AMZN': 0.9444243274629116,
 'MSFT': 0.9202255867421627,
 'GM': 0.9673252701759338,
 'UBER': 1.8104953691363335,
 'Coca Cola': 0.7999161258339882,
 'EBAY': 0.9316788055002689,
 'DIS': 0.5630054976791143,
 'FORD': 1.0555028915405273,
 'NFLX': 0.7649381645023823,
 'NKE': 0.7966938428580761,
 'GE': 1.1473819613456726,
 'WMT': 0.5874163471162319,
 'INTC': 0.6662479601800442,
 'GOOG': 0.8493444882333279,
 'AAPL': 1.0934440419077873}

In [16]:
result_dict_returns_us = get_wapes(**params_us_returns)

  0%|          | 0/15 [00:00<?, ?it/s]

Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AMZN: Test error: 95.01%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

MSFT: Test error: 99.96%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GM: Test error: 100.92%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

UBER: Test error: 97.99%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Coca Cola: Test error: 99.80%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

EBAY: Test error: 91.26%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

DIS: Test error: 95.92%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

FORD: Test error: 96.08%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NFLX: Test error: 96.55%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NKE: Test error: 96.67%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GE: Test error: 100.53%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

WMT: Test error: 100.20%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

INTC: Test error: 93.04%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GOOG: Test error: 93.75%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AAPL: Test error: 94.38%


In [17]:
result_dict_prices_ch = get_wapes(**params_ch_prices)

  0%|          | 0/15 [00:00<?, ?it/s]

Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Shenhua: Test error: 1.28%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Agricultural Bank of China: Test error: 0.24%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Wanhua Chemical Group: Test error: 1.12%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Anhui Coonch Cement: Test error: 0.94%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

CITIC Securities: Test error: 0.86%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Kweichow Moutai: Test error: 0.98%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Duty Free Group: Test error: 1.52%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Ping An: Test error: 0.64%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Pacific Insurance: Test error: 0.89%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

PetroChina: Test error: 0.76%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

SAIC Motor: Test error: 2.87%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Merchants Bank: Test error: 0.86%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Sinopec: Test error: 1.31%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Hengrui Medicine: Test error: 0.98%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Life Insurance Company Limited: Test error: 0.95%


In [18]:
result_dict_returns_ch = get_wapes(**params_ch_returns)

  0%|          | 0/15 [00:00<?, ?it/s]

Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Shenhua: Test error: 92.98%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Agricultural Bank of China: Test error: 102.48%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Wanhua Chemical Group: Test error: 100.60%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Anhui Coonch Cement: Test error: 97.00%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

CITIC Securities: Test error: 90.73%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Kweichow Moutai: Test error: 100.69%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Duty Free Group: Test error: 92.57%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Ping An: Test error: 100.56%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Pacific Insurance: Test error: 100.25%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

PetroChina: Test error: 99.90%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

SAIC Motor: Test error: 93.50%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Merchants Bank: Test error: 99.45%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Sinopec: Test error: 100.51%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Hengrui Medicine: Test error: 95.72%


Dividing loop:   0%|          | 0/50 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Life Insurance Company Limited: Test error: 100.03%


In [19]:
us_result = pd.DataFrame({
    "Company": result_dict_prices_us.keys(),
    "WAPE (price)": result_dict_prices_us.values(),
    "WAPE (return)": result_dict_returns_us.values()
})

ch_result = pd.DataFrame({
    "Company": result_dict_prices_ch.keys(),
    "WAPE (price)": result_dict_prices_ch.values(),
    "WAPE (return)": result_dict_returns_ch.values()
})

In [20]:
us_result

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,AMZN,0.944424,95.010746
1,MSFT,0.920226,99.959499
2,GM,0.967325,100.923097
3,UBER,1.810495,97.985947
4,Coca Cola,0.799916,99.804169
5,EBAY,0.931679,91.264945
6,DIS,0.563005,95.923382
7,FORD,1.055503,96.08447
8,NFLX,0.764938,96.550882
9,NKE,0.796694,96.6748


In [21]:
round(us_result.describe(), 2)

Unnamed: 0,WAPE (price),WAPE (return)
count,15.0,15.0
mean,0.93,96.81
std,0.3,3.02
min,0.56,91.26
25%,0.78,94.7
50%,0.92,96.55
75%,1.01,99.88
max,1.81,100.92


In [22]:
ch_result

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,China Shenhua,1.279408,92.984003
1,Agricultural Bank of China,0.239972,102.47606
2,Wanhua Chemical Group,1.120149,100.603592
3,Anhui Coonch Cement,0.943795,96.995205
4,CITIC Securities,0.855246,90.731359
5,Kweichow Moutai,0.975364,100.687981
6,China Duty Free Group,1.516019,92.571014
7,Ping An,0.639151,100.557494
8,China Pacific Insurance,0.886562,100.251222
9,PetroChina,0.755956,99.897146


In [23]:
round(ch_result.describe(), 2)

Unnamed: 0,WAPE (price),WAPE (return)
count,15.0,15.0
mean,1.08,97.8
std,0.58,3.73
min,0.24,90.73
25%,0.86,94.61
50%,0.95,99.9
75%,1.2,100.53
max,2.87,102.48


In [25]:
us_result.to_csv("mssa_mlp_us_price_return.csv", index= False)
ch_result.to_csv("mssa_mlp_ch_price_return.csv", index= False)