In [1]:
import torch
from torch import nn, optim

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
import numpy as np

# воспроизводимость
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
np.random.seed(42)
import random
random.seed(42)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [2]:
def wape(y, y_hat):
    return np.sum(np.abs(y - y_hat)) / np.sum(np.abs(y)) * 100

def mape(y, y_hat, eps= 1e-7):
    return np.mean(np.abs((y - y_hat) / (y + eps))) * 100

def series_to_X_y(series, windows_size= 30):
    X = []
    y = []

    for i in range(len(series) - windows_size):
        row = series[i:i + windows_size]
        X.append(row)
        column = [series[i + windows_size]]
        y.append(column)

    return np.asmatrix(X), np.asarray(y)

class MLP(nn.Module):
    def __init__(self, neurons, use_dropout= False, use_batchnorm= False):
        super(MLP, self).__init__()

        layers = []
        for i in range(1, len(neurons)):
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(neurons[i - 1]))
            layers.append(nn.Linear(in_features= neurons[i - 1], out_features= neurons[i]))
            layers.append(nn.ReLU())
            if use_dropout:
                layers.append(nn.Dropout(0.2))

        self.fc = nn.Sequential(*(layers[:-2] if use_dropout else layers[:-1]))

    def forward(self, X):
        return self.fc(X)

def get_wapes(path, country, prices_not_returns, windows_size, output_size, neurons, metrics, use_dropout, use_batchnorm, epochs, val_size, test_size, device):

    neurons = [windows_size, *neurons, output_size]

    result_dict = dict()

    entries = os.listdir(path)
    entries.remove(".DS_Store")

    pbar_company = tqdm(entries)
    for file in pbar_company:
        pbar_company.set_description(f"{country}: Epoch")
        df = pd.read_csv(path + file if path[-1] == r"/" else path + r"/" + file).dropna()
        series = np.array(df.Open) if prices_not_returns else np.array(df.Open.pct_change())[1:]
        X, y = series_to_X_y(series, windows_size)

        X = torch.from_numpy(X).to(device).to(torch.float32)
        y = torch.from_numpy(y).to(device).to(torch.float32)

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= test_size, shuffle= False)
        X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size= val_size, shuffle= False)

        X_train = X_train.to(device)
        X_val = X_val.to(device)
        X_test = X_test.to(device)

        y_train = y_train.to(device)
        y_val = y_val.to(device)
        y_test = y_test.to(device)

        model = MLP(neurons, use_dropout= use_dropout, use_batchnorm= use_batchnorm).to(device)
        criterion = nn.MSELoss()
        optimizer = optim.LBFGS(model.parameters())

        best_model = None
        best_val_loss = torch.inf

        pbar_train = tqdm(range(epochs), desc= "Epoch")
        for epoch in pbar_train:
            def closure():
                model.train()
                optimizer.zero_grad()

                train_pred = model(X_train)

                global train_loss
                train_loss = criterion(y_train, train_pred)

                train_loss.backward()
                return train_loss

            optimizer.step(closure)

            model.eval()
            with torch.inference_mode():
                val_preds = model(X_val)
                val_loss = criterion(y_val, val_preds)

            if val_loss.item() < best_val_loss:
                best_val_loss = val_loss.item()
                best_model = model.state_dict()
                torch.save(best_model, "best_model.pt")

            pbar_train.set_description(f"{file[:-14]}: Epoch: {epoch + 1}/{epochs}. Train loss: {train_loss.item():.5f} Val loss: {val_loss.item():.5f}")

        model = MLP(neurons, use_dropout= use_dropout, use_batchnorm= use_batchnorm).to(device)
        model.load_state_dict(torch.load("best_model.pt"))
        model.eval()
        with torch.inference_mode():
            y_hat = model(X_test).detach().cpu().numpy()
            y_test = y_test.detach().cpu().numpy()
            metrics_val = metrics(y_test, y_hat)

        result_dict[file[:-14]] = metrics_val

        print(f"{file[:-14]}: Test error: {result_dict[file[:-14]]:.2f}%")
        os.remove("best_model.pt")

    return result_dict


In [3]:
params_us_prices = {
    "path": "../Data/American Companies",
    "country": "US",
    "prices_not_returns": True,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

params_us_returns = {
    "path": "../Data/American Companies",
    "country": "US",
    "prices_not_returns": False,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

params_ch_prices = {
    "path": "../Data/Chinese Companies",
    "country": "CH",
    "prices_not_returns": True,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

params_ch_returns = {
    "path": "../Data/Chinese Companies",
    "country": "CH",
    "prices_not_returns": False,

    "windows_size": 100,
    "output_size": 1,
    "neurons": [6],

    "metrics": wape,

    "use_dropout": False,
    "use_batchnorm": False,

    "epochs": 60,
    "val_size": 0.2,
    "test_size": 0.1,
    "device": "cuda" if torch.cuda.is_available() else "cpu"
}

In [4]:
result_dict_prices_us = get_wapes(**params_us_prices)

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AMZN: Test error: 1.92%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

MSFT: Test error: 1.40%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GM: Test error: 2.77%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

UBER: Test error: 3.59%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Coca Cola: Test error: 0.86%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

EBAY: Test error: 1.70%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

DIS: Test error: 1.29%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

FORD: Test error: 1.89%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NFLX: Test error: 1.92%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NKE: Test error: 1.45%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GE: Test error: 1.61%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

WMT: Test error: 1.02%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

INTC: Test error: 1.55%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GOOG: Test error: 1.47%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AAPL: Test error: 1.69%


In [5]:
result_dict_returns_us = get_wapes(**params_us_returns)

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AMZN: Test error: 101.21%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

MSFT: Test error: 99.55%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GM: Test error: 101.60%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

UBER: Test error: 100.84%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Coca Cola: Test error: 100.16%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

EBAY: Test error: 100.49%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

DIS: Test error: 100.35%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

FORD: Test error: 101.16%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NFLX: Test error: 103.28%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

NKE: Test error: 99.79%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GE: Test error: 100.44%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

WMT: Test error: 100.20%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

INTC: Test error: 100.42%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

GOOG: Test error: 100.52%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

AAPL: Test error: 100.07%


In [6]:
result_dict_prices_ch = get_wapes(**params_ch_prices)

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Shenhua: Test error: 2.04%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Agricultural Bank of China: Test error: 0.51%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Wanhua Chemical Group: Test error: 1.96%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Anhui Coonch Cement: Test error: 1.49%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

CITIC Securities: Test error: 1.39%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Kweichow Moutai: Test error: 1.58%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Duty Free Group: Test error: 2.37%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Ping An: Test error: 1.24%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Pacific Insurance: Test error: 1.41%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

PetroChina: Test error: 1.57%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

SAIC Motor: Test error: 1.49%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Merchants Bank: Test error: 1.53%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Sinopec: Test error: 1.70%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Hengrui Medicine: Test error: 1.92%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Life Insurance Company Limited: Test error: 1.56%


In [7]:
result_dict_returns_ch = get_wapes(**params_ch_returns)

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Shenhua: Test error: 101.01%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Agricultural Bank of China: Test error: 108.68%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Wanhua Chemical Group: Test error: 100.25%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Anhui Coonch Cement: Test error: 101.50%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

CITIC Securities: Test error: 99.90%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Kweichow Moutai: Test error: 100.70%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Duty Free Group: Test error: 101.27%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Ping An: Test error: 100.26%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Pacific Insurance: Test error: 100.25%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

PetroChina: Test error: 101.31%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

SAIC Motor: Test error: 101.41%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Merchants Bank: Test error: 100.57%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Sinopec: Test error: 101.03%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

Hengrui Medicine: Test error: 100.35%


Epoch:   0%|          | 0/60 [00:00<?, ?it/s]

China Life Insurance Company Limited: Test error: 100.35%


In [13]:
us_result = pd.DataFrame({
    "Company": result_dict_prices_us.keys(),
    "WAPE (price)": result_dict_prices_us.values(),
    "WAPE (return)": result_dict_returns_us.values()
})

ch_result = pd.DataFrame({
    "Company": result_dict_prices_ch.keys(),
    "WAPE (price)": result_dict_prices_ch.values(),
    "WAPE (return)": result_dict_returns_ch.values()
})

In [14]:
us_result

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,AMZN,1.915032,101.205707
1,MSFT,1.403056,99.547803
2,GM,2.765831,101.6011
3,UBER,3.593759,100.837088
4,Coca Cola,0.855535,100.159466
5,EBAY,1.695849,100.49243
6,DIS,1.292816,100.353003
7,FORD,1.890453,101.164329
8,NFLX,1.922355,103.281105
9,NKE,1.454048,99.793148


In [15]:
round(us_result.describe(), 2)

Unnamed: 0,WAPE (price),WAPE (return)
count,15.0,15.0
mean,1.74,100.67
std,0.68,0.9
min,0.86,99.55
25%,1.43,100.18
50%,1.61,100.44
75%,1.9,101.0
max,3.59,103.28


In [17]:
ch_result

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,China Shenhua,2.037898,101.014626
1,Agricultural Bank of China,0.512811,108.675766
2,Wanhua Chemical Group,1.960803,100.249338
3,Anhui Coonch Cement,1.494392,101.504469
4,CITIC Securities,1.391494,99.904495
5,Kweichow Moutai,1.579353,100.703442
6,China Duty Free Group,2.37421,101.2725
7,Ping An,1.243216,100.259042
8,China Pacific Insurance,1.410298,100.253689
9,PetroChina,1.57457,101.309848


In [18]:
round(ch_result.describe(), 2)

Unnamed: 0,WAPE (price),WAPE (return)
count,15.0,15.0
mean,1.59,101.26
std,0.42,2.11
min,0.51,99.9
25%,1.45,100.3
50%,1.56,100.7
75%,1.81,101.29
max,2.37,108.68


In [20]:
us_result.to_csv("mlp_us_price.csv", index= False)
ch_result.to_csv("mlp_ch_price.csv", index= False)