In [7]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [8]:
import pandas as pd
from src.structs import Indicator, Country
from src.utils import plot_time_series, PlotOptions, pacf_plot, acf_plot, Matrix, Float
from src.data import serialize_country_data
from src.statistics import correlation, differencing, stationarity, autocorrelation
import matplotlib.pyplot as plt
from statsmodels.api import OLS
from typing import Literal
import numpy as np

In [None]:
df = pd.read_csv("../data/cleaned/dataset.csv")
country = Country.UNITED_STATES

In [None]:
def plot_country(country: Country, diff: int = 0):
    df = pd.read_csv("../data/cleaned/dataset.csv")
    features, years = serialize_country_data(df, country, pct=False)
    features_diff: list[Matrix[Literal["N"], Float]] = []
    for indicator in Indicator:
        ts_diff = differencing(features[indicator], diff)
        years_diff = years[diff:]
        features_diff.append(ts_diff)

        plot_time_series(
            years_diff,
            ts_diff,
            13,
            PlotOptions(
                "",
                f"{country.name} {indicator.name}",
                "Date",
                "val",
                [f"{indicator.name}", f"{indicator.name} Average"],
                False,
            ),
        )

        pacf_plot(
            years_diff,
            ts_diff,
            len(years_diff) // 2,
            PlotOptions(
                "",
                f"PACF of {indicator.name}",
                "Lag",
                "val",
                [f"PACF of {indicator.name}"],
                False,
            ),
        )

        acf_plot(
            years_diff,
            ts_diff,
            len(years_diff) // 2,
            PlotOptions(
                "",
                f"ACF of {indicator.name}",
                "Lag",
                "val",
                [f"ACF of {indicator.name}"],
                False,
            ),
        )

        adf, kpss = stationarity(ts_diff)
        print("Stationarity test for", indicator.name)
        print(adf)
        print("")
        print(kpss)
        print()

        acf, pacf = autocorrelation(ts_diff)
        print("Autocorrelation for", indicator.name)
        print(acf)
        print("")
        print(pacf)
        print("--------------------")

    corr = correlation(
        features_diff,
        PlotOptions(
            "",
            f"{country.name} Correlation",
            "Indicator",
            "Indicator",
            [indicator.name for indicator in Indicator],
            False,
        ),
    )
    print("Correlation matrices for Pearson, Spearmen and Kendall tau:")
    print(corr)
    plt.show()

In [None]:
plot_country(country, 1)

In [None]:
features, _ = serialize_country_data(df, country, pct=True)
gdp = differencing(features[Indicator.GDP], 0)
ir = differencing(features[Indicator.IR], 0)
cpi = differencing(features[Indicator.CPI], 0)
ols = OLS(gdp, np.column_stack((ir, cpi))).fit()

In [None]:
ols.summary()

In [None]:
plot_country(Country.JAPAN, 0)