In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import os
from tqdm.notebook import tqdm

optuna.logging.set_verbosity(optuna.logging.WARNING)

In [2]:
def get_all_ewma(y, alpha= 2, n= 5):
    v = []
    beta = 1 - alpha / (n + 1)

    for i in range(1, len(y) + 1):
        betas = (beta * np.ones(i)) ** np.arange(i)
        addition = np.dot(betas, (y[:i])[::-1])
        new_value = (1 - beta) * addition
        correction = (1 - beta ** (i)) ** (-1)
        v.append(new_value * correction)

    return np.array(v)

def get_exact_ewma(y, alpha= 2, n= 5, val= 1):
    v = []
    beta = 1 - alpha / (n + 1)

    betas = (beta * np.ones(val)) ** np.arange(val)
    addition = np.dot(betas, (y[:val])[::-1])
    new_value = (1 - beta) * addition
    correction = (1 - beta ** (val)) ** (-1)
    v.append(new_value * correction)

    return np.array(v)

def wape(y, y_hat):
    return np.sum(np.abs(y - y_hat)) / np.sum(np.abs(y))

def mape(y, y_hat):
    return np.mean(np.abs((y - y_hat) / y))

In [3]:
# entries_us = os.listdir("../Data/American Companies")
# entries_us.remove(".DS_Store")
# entries_ch = os.listdir("../Data/Chinese Companies")
# entries_ch.remove(".DS_Store")
#
# result_dict_us = dict()
# result_dict_ch = dict()

In [4]:
def get_wapes(path, country, metrics, prices_not_returns, valid_size, test_size):

    result_dict = dict()
    entries = os.listdir(path)
    entries.remove(".DS_Store")

    pbar = tqdm(entries, desc= f"{country} companies")
    for file_name in pbar:

        df = pd.read_csv(path + file_name if path[-1] == r"/" else path + r"/" + file_name).dropna()
        prices = np.array(df.Open) if prices_not_returns else np.array(df.Open.pct_change())[1:]

        valid_size = valid_size
        test_size = test_size

        # Подбор k
        def objective(trial):

            forecast = []
            start_h = len(prices) - valid_size - test_size
            end_h = len(prices) - test_size

            k = trial.suggest_float("k" , 0, 1)
            for i in range(start_h, end_h):
                v = get_exact_ewma(prices, alpha= 2, n= 20, val= i - 1)
                f = k * prices[i - 1] + (1 - k) * v
                forecast.append(f)

            forecast = np.array(forecast)
            y = prices[-(test_size + valid_size):-test_size]
            y_hat = forecast.reshape(-1)
            wape = metrics(y, y_hat)
            metrics_val = wape * 100

            return metrics_val

        study = optuna.create_study()
        study.optimize(objective, n_trials=200, n_jobs= -1, show_progress_bar= False)

        # Тестрирование
        forecast = []
        k_opt = study.best_params["k"]

        start_h = len(prices) - test_size
        end_h = len(prices)

        k = k_opt
        for i in range(start_h, end_h):
            v = get_exact_ewma(prices, alpha= 2, n= 20, val= i - 1)
            f = k * prices[i - 1] + (1 - k) * v
            forecast.append(f)

        forecast = np.array(forecast)
        y = prices[-test_size:]
        y_hat = forecast.reshape(-1)
        wape = metrics(y, y_hat)

        metrics_val = wape * 100

        result_dict[file_name[:-14]] = metrics_val

        pbar.set_description(f"{country}: {file_name[:-14]}. Valid: {study.best_value:.2f}%. Test: {metrics_val:.2f}%")

    return result_dict

In [5]:
us_prices_wapes = get_wapes("../Data/American Companies", "US", wape, True, 400, 300)

US companies:   0%|          | 0/15 [00:00<?, ?it/s]

In [12]:
us_returns_wapes = get_wapes("../Data/American Companies", "US", wape, False, 400, 300)

US companies:   0%|          | 0/15 [00:00<?, ?it/s]

In [13]:
ch_prices_wapes = get_wapes("../Data/Chinese Companies", "CH", wape, True, 400, 300)

CH companies:   0%|          | 0/15 [00:00<?, ?it/s]

In [14]:
ch_returns_wapes = get_wapes("../Data/Chinese Companies", "CH", wape, False, 400, 300)

CH companies:   0%|          | 0/15 [00:00<?, ?it/s]

In [15]:
us_returns = pd.DataFrame({
    "Company": us_prices_wapes.keys(),
    "WAPE (price)": us_prices_wapes.values(),
    "WAPE (return)": us_returns_wapes.values()
})

ch_returns = pd.DataFrame({
    "Company": ch_prices_wapes.keys(),
    "WAPE (price)": ch_prices_wapes.values(),
    "WAPE (return)": ch_returns_wapes.values()
})

In [16]:
us_returns

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,AMZN,2.131505,103.784531
1,MSFT,1.518125,103.496534
2,GM,2.139221,102.013013
3,UBER,2.849135,103.29654
4,Coca Cola,0.883731,102.113451
5,EBAY,1.809397,100.408324
6,DIS,1.639376,102.632827
7,FORD,2.417377,102.743608
8,NFLX,2.321462,104.273285
9,NKE,1.704946,103.029594


In [17]:
ch_returns

Unnamed: 0,Company,WAPE (price),WAPE (return)
0,China Shenhua,1.982063,105.340244
1,Agricultural Bank of China,0.468983,104.81972
2,Wanhua Chemical Group,1.509416,102.58776
3,Anhui Coonch Cement,1.458439,104.211555
4,CITIC Securities,1.248193,102.434935
5,Kweichow Moutai,1.36266,103.804991
6,China Duty Free Group,2.276677,103.41959
7,Ping An,1.197741,102.123825
8,China Pacific Insurance,1.316325,101.983185
9,PetroChina,1.513595,104.211693


In [19]:
us_returns.to_csv("ewma_us_price_return.csv", index= False)
ch_returns.to_csv("ewma_ch_price_return.csv", index= False)