In [None]:
import pandas as pd
from pmdarima.arima import auto_arima
import pmdarima
from tqdm.notebook import tqdm
import os
import numpy as np

In [None]:
def wape(y, y_hat):
    return np.sum(np.abs(y - y_hat)) / np.sum(np.abs(y))

def get_wapes(path, country, prices_not_returns, metrics, test_size):
    result_dict = dict()

    entries = os.listdir(path)
    entries.remove(".DS_Store")
    pbar_companies = tqdm(entries, desc= f"{country} companies")
    for file in pbar_companies:
        df = pd.read_csv(path + file if path[-1] == r"/" else path + r"/" + file)
        open_prices = np.array(df.Open) if prices_not_returns else np.array(df.Open.pct_change())[1:]

        test_size = test_size
        start_h = len(open_prices) - test_size
        end_h = len(open_prices)

        forecast = []
        arima_model = auto_arima(
            open_prices[:start_h],
            start_p= 0,
            start_q= 0,
            d= 1 if prices_not_returns else 0,
            max_p= 10,
            max_d= 2,
            max_q= 10,
            #max_order= 4,
            seasonal= False,
            information_criterion= "bic",
            alpha= 0.01,
            test= "kpss",
            stepwise= True,
            # n_jobs= -1,
            n_fits= 70,
            trace= False)
        order = arima_model.get_params()["order"]

        pbar = tqdm(range(start_h, end_h - 1), desc= "Forecast iter")
        for i in pbar:
            arima_model = pmdarima.arima.ARIMA(order)
            arima_model.fit(open_prices[:i])
            f = arima_model.predict(n_periods= 1)
            forecast.append(f)
        forecast = np.array(forecast)

        y = forecast
        y_hat = open_prices[-test_size:]
        metrics_val = metrics(y, y_hat) * 100

        result_dict[file[:-14]] = metrics_val
        pbar_companies.set_description(f"{country}: {file[:-14]}: Test val: {metrics_val:.2f}%")

    return result_dict

In [None]:
path_us = "../Data/American Companies/"
path_ch = "../Data/Chinese Companies/"

forecast_horizon = 100
us_prices_wapes = get_wapes(path_us, "US", True, wape, forecast_horizon)


In [None]:
df = np.array(pd.read_csv("../Data/American Companies/AMZN 1997 2022.csv").Open)

In [None]:
start_h = len(df) - 300
end_h = len(df)
forecast = []

for i in range(start_h, end_h):
    model = pmdarima.arima.ARIMA((1, 1, 0))
    model = model.fit(df[start_h:])
    f = model.predict(1)
    forecast.append(f)

forecast = np.array(forecast)

In [None]:
us_prices_wapes

In [None]:

us_returns_wapes = get_wapes(path_us, "US", False, wape, forecast_horizon)

ch_prices_wapes = get_wapes(path_ch, "CH", True, wape, forecast_horizon)
ch_returns_wapes = get_wapes(path_ch, "CH", False, wape, forecast_horizon)

In [None]:


us_data = pd.DataFrame({
    "Company": us_prices_wapes.keys(),
    "WAPE (price)": us_prices_wapes.values(),
    "WAPE (return)": us_returns_wapes.values()
})

ch_data = pd.DataFrame({
    "Company": ch_prices_wapes.keys(),
    "WAPE (price)": ch_prices_wapes.values(),
    "WAPE (return)": ch_returns_wapes.values()
})