In [None]:
import os
import sys

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
from src.structs import Indicator, Country
from src.utils import plot_time_series, PlotOptions, pacf_plot, acf_plot, Matrix, Float
from src.data import serialize_country_data
from src.statistics import correlation, differencing, stationarity, autocorrelation
import matplotlib.pyplot as plt
from statsmodels.api import OLS
from typing import Literal
import numpy as np

In [None]:
df = pd.read_csv("../data/cleaned/dataset.csv")
country = Country.ITALY

In [None]:
def plot_country(country: Country, diff: int = 0):
    df = pd.read_csv("../data/cleaned/dataset.csv")
    features, years = serialize_country_data(df, country, pct=True)
    features_diff: list[Matrix[Literal["N"], Float]] = []
    for indicator in Indicator:
        ts_diff = differencing(features[indicator], diff)
        years_diff = years[diff:]
        features_diff.append(ts_diff)

        plot_time_series(
            years_diff,
            ts_diff,
            13,
            PlotOptions(
                "",
                f"{country.name} {indicator.name}",
                "Date",
                "val",
                [f"{indicator.name}", f"{indicator.name} Average"],
                False,
            ),
        )

        pacf_plot(
            years_diff,
            ts_diff,
            len(years_diff) // 2,
            PlotOptions(
                "",
                f"PACF of {indicator.name}",
                "Lag",
                "val",
                [f"PACF of {indicator.name}"],
                False,
            ),
        )

        acf_plot(
            years_diff,
            ts_diff,
            len(years_diff) // 2,
            PlotOptions(
                "",
                f"ACF of {indicator.name}",
                "Lag",
                "val",
                [f"ACF of {indicator.name}"],
                False,
            ),
        )

        adf, kpss = stationarity(ts_diff)
        print("Stationarity test for", indicator.name)
        print(adf)
        print("")
        print(kpss)
        print()

        acf, pacf = autocorrelation(ts_diff)
        print("Autocorrelation for", indicator.name)
        print(acf)
        print("")
        print(pacf)
        print("--------------------")

    corr = correlation(
        features_diff,
        PlotOptions(
            "",
            f"{country.name} Correlation",
            "Indicator",
            "Indicator",
            [indicator.name for indicator in Indicator],
            False,
        ),
    )
    print("Correlation matrices for Pearson, Spearmen and Kendall tau:")
    print(corr)
    plt.show()

In [None]:
plot_country(country, 1)

In [None]:
features, _ = serialize_country_data(df, country, pct=True)
gdp = differencing(features[Indicator.GDP], 0)
ir = differencing(features[Indicator.IR], 0)
cpi = differencing(features[Indicator.CPI], 0)
ols = OLS(gdp, np.column_stack((ir, cpi))).fit()

In [None]:
ols.summary()

In [None]:
plot_country(Country.JAPAN, 0)

In [8]:
import pandas as pd

chunksize: int = 10**4
reader = pd.read_csv(
    "../data/raw/IFS_09-26-2023 00-50-38-77_timeSeries.csv",
    chunksize=chunksize,
    dtype=str,
)

indicators: dict[str, set[str]] = {}
countries: dict[str, set[str]] = {}
for chunk in reader:
    for row in chunk.iterrows():
        row = row[1]
        if indicators.get(row["Country Name"]) is None:
            indicators[row["Country Name"]] = set()
        indicators[row["Country Name"]].add(row["Indicator Name"])

        if countries.get(row["Indicator Name"]) is None:
            countries[row["Indicator Name"]] = set()
        countries[row["Indicator Name"]].add(row["Country Name"])

In [7]:
indicators["France"]

{'Assets (with Fund Record), US Dollars',
 'Assets, Direct Investment, Debt Instruments, US Dollars',
 'Assets, Direct Investment, Equity and Investment Fund Shares, US Dollars',
 'Assets, Direct Investment, US Dollars',
 'Assets, Financial Derivatives (Other Than Reserves) and Employee Stock Options, US Dollars',
 'Assets, Other Investment, Other Equity, US Dollars',
 'Assets, Other Investment, US Dollars',
 'Assets, Other Investment: Debt Instruments, US Dollars',
 'Assets, Portfolio Investment, Debt Securities, US Dollars',
 'Assets, Portfolio Investment, Equity and Investment Fund Shares, US Dollars',
 'Assets, Portfolio Investment, US Dollars',
 'Assets, Reserve Assets, Net (with Fund Record), US Dollars',
 'Capital Account, Total, Debit, US Dollars',
 'Central Bank Survey, Claims on Central Government (Euro Area-Wide-Residency), Euros',
 'Central Bank Survey, Claims on Nonresidents (Euro Area-Wide-Residency), Euros',
 'Central Bank Survey, Claims on Other Depository Corporations 

In [None]:
# invert indicators map
indicators_map: dict[str, list[str]] = {}


In [11]:
countries["Financial, Interest Rates, Lending Rate, Percent per annum"]

{'Afghanistan, Islamic Rep. of',
 'Albania',
 'Algeria',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia, Rep. of',
 'Aruba, Kingdom of the Netherlands',
 'Australia',
 'Azerbaijan, Rep. of',
 'Bahamas, The',
 'Bahrain, Kingdom of',
 'Bangladesh',
 'Barbados',
 'Belarus, Rep. of',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cameroon',
 'Canada',
 'Central African Economic and Monetary Community',
 'Chile',
 'China, P.R.: Hong Kong',
 'China, P.R.: Macao',
 'China, P.R.: Mainland',
 'Colombia',
 'Comoros, Union of the',
 'Congo, Dem. Rep. of the',
 'Congo, Rep. of',
 'Costa Rica',
 'Croatia, Rep. of',
 'Cyprus',
 'Czech Rep.',
 "Côte d'Ivoire",
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Rep.',
 'Eastern Caribbean Currency Union (ECCU)',
 'Ecuador',
 'Egypt, Arab Rep. of',
 'El Salvador',
 'Equatorial Guinea, Rep. of',
 'Eswati

In [20]:
list_indicators = list(countries.keys())
list_countries = list(indicators.keys())

# create df with indicators as rows and countries as columns
df = pd.DataFrame(index=list_indicators, columns=list_countries)
df["Indicator"] = list_indicators

# fill df with 1 if indicator is present in country
for country in list_countries:
    for indicator in list_indicators:
        df.at[indicator, country] = 1 if country in countries[indicator] else 0

# df.to_csv("../data/cleaned/indicators_countries.csv")

In [21]:
df

Unnamed: 0,"Afghanistan, Islamic Rep. of",Advanced Economies,Angola,Algeria,Albania,"Andorra, Principality of","Armenia, Rep. of",Australia,Argentina,Antigua and Barbuda,...,West African Economic and Monetary Union (WAEMU),West Bank and Gaza,World,"Yemen, Rep. of",Yugoslavia,Zambia,Yemen Arab Rep.,"Yemen, People's Dem. Rep. of",Zimbabwe,Indicator
"Fund Accounts, UFC & Loans, SDRs",1,1,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,1,1,1,"Fund Accounts, UFC & Loans, SDRs"
"Fund Accounts, Outstanding Loans, SDRs",1,1,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,1,1,1,"Fund Accounts, Outstanding Loans, SDRs"
"Fund Accounts, SDR Holdings as % of Allocation, Percent per annum",1,1,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,1,1,1,"Fund Accounts, SDR Holdings as % of Allocation..."
"Fund Accounts, SDR Holdings, Allocations, SDRs",1,1,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,1,1,1,"Fund Accounts, SDR Holdings, Allocations, SDRs"
"Fund Accounts, Quota, SDRs",1,1,1,1,1,1,1,1,1,1,...,1,0,1,1,1,1,1,1,1,"Fund Accounts, Quota, SDRs"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Monetary, Other Banking Institutions and Nonbank Institutions, Variations of Claims on Public Nonfinancial Corporations, Claims on Nonbank Public Financial Institutions (Non-Standardized Presentation), Domestic Currency",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Monetary, Other Banking Institutions and Nonba..."
"Monetary, Monetary Survey, Domestic Claims, Claims on Public Non-financial Corporations, Claims on Nonbank Public Financial Institutions (Non-Standardized Presentation), Domestic Currency",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Monetary, Monetary Survey, Domestic Claims, Cl..."
"Monetary, Banking Survey and Financial Survey, Liabilities to Nonbank Financial Institutions (Non-Standardized Presentation), Domestic Currency",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Monetary, Banking Survey and Financial Survey,..."
"Monetary, Banking Survey and Financial Survey, Domestic Claims, Claims on Public Non-financial Corporations, Claims on Nonbank Public Financial Institutions (Non-Standardized Presentation), Domestic Currency",0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,"Monetary, Banking Survey and Financial Survey,..."


In [23]:
df.to_excel("../data/cleaned/indicators_countries.xlsx")