In [2]:
import pandas as pd
from pmdarima.arima import auto_arima
import arch
import pmdarima
from tqdm.notebook import tqdm
import os
import numpy as np

In [3]:
def wape(y, y_hat):
    return np.sum(np.abs(y - y_hat)) / np.sum(np.abs(y)) * 100

def get_wapes(path, country, prices_not_returns, metrics, test_size):
    result_dict = dict()

    entries = os.listdir(path)
    entries.remove(".DS_Store")
    pbar_companies = tqdm(entries, desc= f"{country} companies")
    for file in pbar_companies:
        df = pd.read_csv(path + file if path[-1] == r"/" else path + r"/" + file).dropna()
        open_prices = np.diff(np.array(df.Open)) if prices_not_returns else np.array(df.Open.pct_change())[1:]

        test_size = test_size
        start_h = len(open_prices) - test_size
        end_h = len(open_prices)

        forecast = []
        arima_model = auto_arima(
            open_prices[:start_h],
            start_p= 0,
            start_q= 0,
            d= 0,
            max_p= 10,
            max_d= 2,
            max_q= 10,
            max_order= 7,
            seasonal= False,
            information_criterion= "bic",
            alpha= 0.01,
            test= "kpss",
            stepwise= True,
            n_fits= 72,
            trace= False)
        order = arima_model.get_params()["order"]

        pbar = tqdm(range(start_h, end_h), desc= "Forecast iter")
        for i in pbar:
            # arima_model = pmdarima.arima.ARIMA(order)
            # arima_model = arima_model.fit(open_prices[:i])
            # f1 = arima_model.predict(n_periods= 1)
            #
            # residuals = arima_model.resid()

            figarch_model = arch.arch_model(open_prices[:i], mean= "AR", lags = order[0], vol= "GARCH", p= 0, q= 0, dist= "normal")
            result = figarch_model.fit(disp= "off")

            f2 = result.forecast(horizon= 1, reindex= True, simulations= 5)
            f2 = 2 * f2.variance[-1:] ** 0.5 + f2.mean[-1:]
            f2 = np.array(f2)[0][0]

            forecast.append(f2)
        forecast = np.array(forecast)

        y = forecast
        y_hat = open_prices[-test_size:]
        metrics_val = metrics(y, y_hat)

        result_dict[file[:-14]] = metrics_val
        pbar_companies.set_description(f"{country}: {file[:-14]}: Test val: {metrics_val:.2f}%")

    return result_dict

In [5]:
path_us = "../Data/American Companies/"
path_ch = "../Data/Chinese Companies/"

forecast_horizon = 150

In [6]:
us_prices_wapes = get_wapes(path_us, "US", True, wape, forecast_horizon)
us_returns_wapes = get_wapes(path_us, "US", False, wape, forecast_horizon)

us_data = pd.DataFrame({
    "Company": us_prices_wapes.keys(),
    "WAPE (price)": us_prices_wapes.values(),
    "WAPE (return)": us_returns_wapes.values()
})
us_data.to_csv("arima_figarch_us.csv", index= False)

US companies:   0%|          | 0/15 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

<class 'ValueError'>: One of p or o must be strictly positive

In [5]:
ch_prices_wapes = get_wapes(path_ch, "CH", True, wape, forecast_horizon)
ch_returns_wapes = get_wapes(path_ch, "CH", False, wape, forecast_horizon)

ch_data = pd.DataFrame({
    "Company": ch_prices_wapes.keys(),
    "WAPE (price)": ch_prices_wapes.values(),
    "WAPE (return)": ch_returns_wapes.values()
})
ch_data.to_csv("arima_figarch_ch.csv", index= False)

CH companies:   0%|          | 0/15 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.001679. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.001679. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.001678. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.001678. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.001678. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

m

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.01349. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.01348. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.01348. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.01348. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.01348. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model 

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.04626. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.04625. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.04624. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.04622. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.04621. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10 * y.

model 

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

CH companies:   0%|          | 0/15 [00:00<?, ?it/s]

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0005471. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005473. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005472. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005472. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005471. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0006171. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.000617. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006169. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006167. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006167. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.00076. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0007599. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0007599. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0007598. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0007597. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0006801. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006802. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.00068. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006799. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006798. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0005662. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005661. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.000566. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005658. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0005657. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 10

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0001533. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0001533. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0001532. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0001532. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0001532. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0008119. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008117. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008116. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008119. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008118. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0006568. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006567. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006565. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006564. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0006562. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0008516. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008514. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008513. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008512. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008511. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1

Forecast iter:   0%|          | 0/150 [00:00<?, ?it/s]

estimating the model parameters. The scale of y is 0.0008437. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008436. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008475. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008473. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

estimating the model parameters. The scale of y is 0.0008471. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 1