In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
COUNTRY_CODES_AND_NAMES_MAPPING = {
    "JPN": "Japan",
    "DEU": "Germany",
    "USA": "USA",
    "GBR": "United Kingdom",
    "IND": "India",
    "CAN": "Canada",
    "FRA": "France",
    "ITA": "Italy",
    "AUS": "Australia",
    "CHE": "Switzerland",
    "ESP": "Spain",
    "GRC": "Greece",
    "MEX": "Mexico",
    "ZAF": "South Africa",
    "KOR": "South Korea",
    "NLD": "Netherlands",
    "SWE": "Sweden",
    "IRL": "Ireland",
    "BEL": "Belgium",
    "NZL": "New Zealand",
    "DNK": "Denmark",
    "POL": "Poland",
    "LUX": "Luxembourg",
    "NOR": "Norway",
    "PRT": "Portugal",
    "CHL": "Chile",
    "AUT": "Austria",
    "ISR": "Israel",
    "HUN": "Hungary",
    "RUS": "Russia",
    "FIN": "Finland",
    "COL": "Colombia",
    "LVA": "Latvia",
    "SVK": "Slovak Republic",
    "CRI": "Costa Rica",
    "SVN": "Slovenia",
    "LTU": "Lithuania",
    "CZE": "Czech Republic",
    "ISL": "Iceland",
}

In [None]:
def _make_missing_values_heatmap(data, data_name, index=None):
    """Generates a heatmap indicating the presence of 0 values in the DataFrame.

    Parameters:
        data (DataFrame): The pandas DataFrame to analyze.
        data_name (str): The name of the DataFrame.

    Returns:
        None
    """
    if index:
        data = data.set_index(index)
        data = data.sort_index()

    # Create a DataFrame indicating 0 and infinity values
    data = data.isnull()

    # Create the heatmap
    plt.figure(figsize=(10, 6))
    sns.heatmap(data, cbar=False, cmap="viridis")
    plt.title("Missing Values in Dataset " + data_name)
    plt.show()

In [None]:
cpi = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/cpi.pkl",
)

cpi.head()

In [None]:
quarterly_gdp_usd = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/quarterly_gdp_USD.pkl",
)
quarterly_gdp_usd.head()

In [None]:
merge = pd.merge(
    cpi,
    quarterly_gdp_usd,
    on=["REF_AREA", "Date"],
    how="left",
    validate="one_to_one",
)

In [None]:
_make_missing_values_heatmap(merge, "merge", index=["Date"])

In [None]:
debt_by_GDP = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/debt_by_GDP.pkl",
)

In [None]:
debt_by_GDP.head()

In [None]:
debt_by_GDP

In [None]:
print(
    debt_by_GDP[
        (debt_by_GDP["REF_AREA"] == "USA")
        & (debt_by_GDP["Date"] == pd.to_datetime("1960-10-01"))
    ],
)

In [None]:
merge_2 = pd.merge(
    merge,
    debt_by_GDP,
    on=["REF_AREA", "Date"],
    how="right",
    validate="one_to_one",
)

In [None]:
current_account = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/current_account.pkl",
)

In [None]:
merge_3 = pd.merge(
    merge_2,
    current_account,
    on=["REF_AREA", "Date"],
    how="left",
    validate="one_to_one",
)

In [None]:
merge_3.head()

In [None]:
_make_missing_values_heatmap(merge_3, "merge", index=["Date"])

In [None]:
real_quarterly_gva = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/real_quarterly_gva.pkl",
)

In [None]:
real_quarterly_gva.duplicated().sum()

In [None]:
merge_4 = pd.merge(
    merge_3,
    real_quarterly_gva,
    on=["REF_AREA", "Date"],
    how="left",
    validate="one_to_one",
)

In [None]:
_make_missing_values_heatmap(merge_4, "merge", index=["Date"])

In [None]:
current_account = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/current_account.pkl",
)

In [None]:
merge_5 = pd.merge(
    merge_4,
    current_account,
    on=["REF_AREA", "Date"],
    how="left",
    validate="one_to_one",
)

In [None]:
_make_missing_values_heatmap(merge_5, "merge", index=["Date"])

In [None]:
merge_5["Country"] = merge_5["REF_AREA"].map(COUNTRY_CODES_AND_NAMES_MAPPING)

In [None]:
merge_5["Country"].isna().sum()

In [None]:
interest_rates = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/10_year_maturity_bond_yields.pkl",
)

In [None]:
merge_6 = pd.merge(
    merge_5,
    interest_rates,
    on=["Country", "Date"],
    how="left",
    validate="one_to_one",
)

In [None]:
us_treasuries = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/3_month_US_treasuries.pkl",
)

In [None]:
vix = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/vix.pkl",
)

In [None]:
def turn_daily_time_series_into_quarterly_data(data):
    """This function turns daily data into quarterly data by taking the quarterly mean."""
    # Create a dictionary of old and new column names
    rename_dict = {
        col: col + "_Quarterly_Mean" if col != "Date" else col for col in data.columns
    }

    output_data = data.resample("QE", on="Date").mean().rename(columns=rename_dict)

    # Convert the index to a 'Date_Quarterly' column
    output_data["Date_Quarterly"] = output_data.index.to_period("Q")

    # Reset the index
    return output_data.reset_index(drop=True)

In [None]:
vix_quarterly = turn_daily_time_series_into_quarterly_data(vix)

In [None]:
vix_quarterly.head()

In [None]:
nasdaq = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/nasdaq.pkl",
)

In [None]:
nasdaq_quarterly = turn_daily_time_series_into_quarterly_data(nasdaq)

In [None]:
merge_1 = pd.merge(
    nasdaq_quarterly,
    vix_quarterly,
    on=["Date_Quarterly"],
    how="left",
    validate="one_to_one",
)

In [None]:
merge_1

In [None]:
_make_missing_values_heatmap(merge_1, "merge", index=["Date_Quarterly"])

In [None]:
three_month_us_treasuries = pd.read_pickle(
    "/Users/nicolasroever/Desktop/Global Macro Variables/global_macro_variables/bld/3_month_US_treasuries.pkl",
)

In [None]:
three_month_us_treasuries_quarterly = turn_daily_time_series_into_quarterly_data(
    three_month_us_treasuries,
)

In [None]:
three_month_us_treasuries.head()

In [None]:
merge_1.columns

In [None]:
merge_2 = pd.merge(
    merge_1,
    three_month_us_treasuries_quarterly,
    on=["Date_Quarterly"],
    how="left",
    validate="one_to_one",
)

In [None]:
_make_missing_values_heatmap(merge_2, "merge", index=["Date_Quarterly"])