In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import requests
import seaborn as sns
import xmltodict

In [None]:
event_study_coefficients = pd.read_pickle(
    "/Users/nicolasroever/Documents/Promotion/Debt Crisis/debt_crisis/bld/data/event_study_approach/event_study_coefficients_data.pkl",
)

In [None]:
event_study_coefficients.head()

In [None]:
event_study_data = pd.read_pickle(
    "/Users/nicolasroever/Documents/Promotion/Debt Crisis/debt_crisis/bld/data/event_study_approach/event_study_dataset.pkl",
)

In [None]:
def plot_sentiment_index_and_bond_yield_spread_for_country(
    first_step_regression_data,
    country,
    color_scheme=None,
):
    # Filter the data for the given country
    if color_scheme is None:
        color_scheme = ["#3c5488", "#e64b35", "#4dbbd5", "#00a087", "#f39b7f"]
    country_data = first_step_regression_data[
        first_step_regression_data["Country"] == country
    ]
    country_data = country_data.sort_values("Date")

    # Set the style of the plot
    sns.set_style("white")

    # Create the plot
    fig, ax1 = plt.subplots(figsize=(8, 5))

    ax1.plot(
        country_data["Date"],
        country_data["Bond_Yield_Spread"],
        marker="o",
        color=color_scheme[0],
        label=f"Bond Yield Spread {country.capitalize()} ",
    )
    ax1.set_ylabel("Bond Yield Spread in Basis Points", fontsize=14)

    ax2 = ax1.twinx()
    ax2.plot(
        country_data["Date"],
        country_data["McDonald_Sentiment_Index"],
        marker="o",
        color=color_scheme[1],
        label=f"Sentiment Index {country.capitalize()} ",
    )
    ax2.set_ylabel("Sentiment Index", fontsize=14)
    ax2.invert_yaxis()  # Invert the right y-axis

    # Add a horizontal line at y=0

    # Set the title and labels
    plt.title(
        f"Raw Sentiment Data {country.capitalize()} with Bond Yield Spread for {country.capitalize()} ",
        fontsize=16,
    )
    plt.xlabel("Date", fontsize=14)

    # Keep only the y-axis and x-axis
    sns.despine(left=False, bottom=False, right=False, top=True)

    # Create a legend for both lines
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc="upper right")

    # Use LaTeX style for the font
    plt.rc("text", usetex=True)

    # Align the zero of both y-axes

    return fig

In [None]:
fig = plot_sentiment_index_and_bond_yield_spread_for_country(
    event_study_data,
    "portugal",
)
fig.show()

In [None]:
portugal_filter = event_study_data[event_study_data["Country"] == "portugal"]

In [None]:
portugal_filter["McDonald_Sentiment_Index"].corr(portugal_filter["Bond_Yield_Spread"])

In [None]:
pattern = r"^Dummy_\w+_\w+$"
coefficient_data = event_study_data.loc[
    event_study_data["Variable"].str.contains(pattern, regex=True),
    :,
]

coefficient_data["Date"] = pd.to_datetime(
    coefficient_data["Variable"].str.split("_").str[-1],
)
coefficient_data["Country"] = coefficient_data["Variable"].str.split("_").str[-2]
coefficient_data["CI_95_lower"] = (
    coefficient_data["Coefficient"] - coefficient_data["Standard Errors"] * 1.96
)
coefficient_data["CI_95_upper"] = (
    coefficient_data["Coefficient"] + coefficient_data["Standard Errors"] * 1.96
)

In [None]:
coefficient_data.head()

In [None]:
nature_color_scheme = ["#3c5488", "#e64b35", "#4dbbd5", "#00a087", "#f39b7f"]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style of the plot
sns.set_style("white")

# Filter the data for the given country
greece_data = coefficient_data[coefficient_data["Country"] == "greece"]
greece_data = greece_data.sort_values("Date")

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(
    greece_data["Date"],
    greece_data["Coefficient"],
    marker="o",
    color=nature_color_scheme[0],
)

# Add a horizontal line at y=0
plt.axhline(0, color="grey", linestyle=":")

# Plot the confidence interval
plt.fill_between(
    greece_data["Date"],
    greece_data["CI_95_lower"],
    greece_data["CI_95_upper"],
    color="b",
    alpha=0.1,
)

# Set the title and labels
plt.title("Coefficients for Greece Over Time with Confidence Interval", fontsize=16)
plt.xlabel("Date", fontsize=14)
plt.ylabel("Coefficient", fontsize=14)

# Remove the legend

# Keep only the y-axis and x-axis
sns.despine(left=False, bottom=False, right=True, top=True)

# Use LaTeX style for the font
plt.rc("text", usetex=True)

# Show the plot
plt.show()

In [None]:
EVENT_STUDY_COUNTRIES = [
    "netherlands",
    "latvia",
    "austria",
    "italy",
    "finland",
    "slovenia",
    "lithuania",
    "greece",
    "portugal",
    "spain",
    "germany",
    "belgium",
    "ireland",
    "france",
]

In [None]:
" + ".join(
    [
        f"Dummy_{country}_{quarter}"
        for country in EVENT_STUDY_COUNTRIES
        for quarter in pd.period_range(start="2009Q1", end="2011Q4", freq="Q")
    ],
)

In [None]:
formula = (
    "Bond_Yield_Spread ~ Q('Public_Debt_as_%_of_GDP')+ GDP_in_Current_Prices_Growth + "
    "GDP_in_Current_Prices_Growth_Lead + Current_Account_in_USD + "
    "VIX_Daily_Close_Quarterly_Mean + Q('Eurostat_CPI_Annualised Growth_Rate') + "
    "NASDAQ_Daily_Close_Quarterly_Mean + Q('3_Month_US_Treasury_Yield_Quarterly_Mean')"
    + " + ".join(
        [
            f"Dummy_{country}_{quarter}"
            for country in EVENT_STUDY_COUNTRIES
            for quarter in pd.period_range(start="2009Q1", end="2009Q4", freq="Q")
        ],
    )
)

In [None]:
pd.read_pickle(
    "/Users/nicolasroever/Documents/Promotion/Debt Crisis/debt_crisis/bld/data/event_study_approach/event_study_coefficients_data.pkl",
)

In [None]:
event_study_data.head()

In [None]:
def add_quarter_columns(df):
    quarters = pd.period_range(start="2009Q1", end="2011Q4", freq="Q")
    for quarter in quarters:
        start_date = quarter.start_time
        end_date = quarter.end_time
        df[str(quarter)] = (
            (df["Date"] >= start_date)
            & (df["Date"] <= end_date)
            & (df["Country"] == "Greece")
        ).astype(int)
    return df

In [None]:
quarterly_data = pd.read_pickle(
    "/Users/nicolasroever/Documents/Promotion/Debt Crisis/debt_crisis/bld/data/step_one_regression_dataset_output_quarterly.pkl",
)

In [None]:
quarterly_data.columns

In [None]:
dictionary = pd.read_csv(
    "/Users/nicolasroever/Documents/Promotion/Debt Crisis/debt_crisis/bld/data/sentiment_data/sentiment_word_count_clean.csv",
)

In [None]:
# Plot positives

# Filter the DataFrame
filtered_data = dictionary[
    (dictionary["Positive_Indicator"] == 1) & (dictionary["Count"] > 600)
]

# Sort the DataFrame
sorted_data = filtered_data.sort_values(by="Count", ascending=False)

# Create the plot
plt.figure(figsize=(10, 6))
plt.barh(sorted_data["Word"], sorted_data["Count"])
plt.xlabel("Word")
plt.ylabel("Count")
plt.title("Word Counts for Positive Words")
plt.yticks(fontsize=8)  # Rotate x-axis labels for better readability
plt.show()

In [None]:
# Plot positives

# Filter the DataFrame
filtered_data_2 = dictionary[
    (dictionary["Negative_Indicator"] == 1) & (dictionary["Count"] > 600)
]

# Sort the DataFrame
sorted_data_2 = filtered_data_2.sort_values(by="Count", ascending=False)

# Create the plot
plt.figure(figsize=(10, 6))
plt.barh(sorted_data_2["Word"], sorted_data_2["Count"])
plt.xlabel("Word")
plt.ylabel("Count")
plt.title("Word Counts for Negativev Words")
plt.yticks(fontsize=8)  # Rotate x-axis labels for better readability
plt.show()

In [None]:
dictionary.sort_values(by="Count", ascending=False)

In [None]:
dictionary.T

In [None]:
dictionary.columns = ["Word", "Count"]

In [None]:
COUNTRIES_UNDER_STUDY = {
    "austria",
    "belgium",
    "bulgaria",
    "croatia",
    "cyprus",
    "czechia",
    "denmark",
    "estonia",
    "finland",
    "france",
    "germany",
    "greece",
    "hungary",
    "ireland",
    "italy",
    "latvia",
    "lithuania",
    "luxembourg",
    "malta",
    "netherlands",
    "poland",
    "portugal",
    "romania",
    "slovakia",
    "slovenia",
    "spain",
    "sweden",
}

In [None]:
quarterly_data.columns

In [None]:
quartertly_countries = set(quarterly_data["Country"].unique())

In [None]:
common_countries = COUNTRIES_UNDER_STUDY.intersection(quartertly_countries)

In [None]:
common_countries

# Parameters
Find the parameters at https://data-explorer.oecd.org/vis?df[ds]=dsDisseminateFinalDMZ&df[id]=DSD_NAMAIN1%40DF_QNA_EXPENDITURE_USD&df[ag]=OECD.SDD.NAD&df[vs]=1.0&pd=%2C&dq=Q..AUS.S1..B1GQ.....V..&ly[cl]=TIME_PERIOD&to[TIME_PERIOD]=false&lo=5&lom=LASTNPERIODS 

In [None]:
url = "https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_BOP@DF_BOP,1.0/USA..CA.B..Q.USD_EXC+XDC.N?dimensionAtObservation=AllDimensions"

# Request Data

In [None]:
r = requests.get(url)

In [None]:
dictionary_data = xmltodict.parse(r.content)

In [None]:
dictionary_data

In [None]:
dictionary_data["message:GenericData"]["message:DataSet"]["generic:Obs"]

In [None]:
observation_dictionary = dictionary_data["message:GenericData"]["message:DataSet"][
    "generic:Obs"
][0]

In [None]:
dictionary_data["message:GenericData"]["message:DataSet"]["generic:Obs"][0][
    "generic:ObsValue"
]

In [None]:
# 1. obskey
obs_key_data = observation_dictionary["generic:ObsKey"]["generic:Value"]
obs_key_dict = {d["@id"]: d["@value"] for d in obs_key_data}

In [None]:
# Obs value
obs_value_dict = observation_dictionary["generic:ObsValue"]

In [None]:
# Obs value
obs_attributes_data = observation_dictionary["generic:Attributes"]["generic:Value"]
obs_attributes_dict = {d["@id"]: d["@value"] for d in obs_attributes_data}

In [None]:
full_observation = {**obs_key_dict, **obs_value_dict, **obs_attributes_dict}

In [None]:
pd.DataFrame(full_observation, index=[0])

In [None]:
full_data = pd.DataFrame()

In [None]:
for i in range(
    len(dictionary_data["message:GenericData"]["message:DataSet"]["generic:Obs"]),
):
    observation_dictionary = dictionary_data["message:GenericData"]["message:DataSet"][
        "generic:Obs"
    ][i]
    # 1. obskey
    obs_key_data = observation_dictionary["generic:ObsKey"]["generic:Value"]
    obs_key_dict = {d["@id"]: d["@value"] for d in obs_key_data}
    # Obs value
    obs_value_dict = observation_dictionary["generic:ObsValue"]
    # ObsAttributes
    obs_attributes_data = observation_dictionary["generic:Attributes"]["generic:Value"]
    obs_attributes_dict = {d["@id"]: d["@value"] for d in obs_attributes_data}

    full_observation = {**obs_key_dict, **obs_value_dict, **obs_attributes_dict}
    full_data = pd.concat([full_data, pd.DataFrame(full_observation, index=[0])])

In [None]:
full_data.head()

In [None]:
full_data["COUNTERPART_AREA"].unique()

In [None]:
# Set out everything about the request in the format specified by the OECD API
data = oecd.data(resource_id="DSD_NAMAIN1").to_pandas()

df = pd.DataFrame(data).reset_index()
df.head()

In [None]:
df["MEASURE"]

In [None]:
# Tell pdmx we want OECD data
oecd = pdmx.Request("OECD")
# Set out everything about the request in the format specified by the OECD API
data = oecd.data(
    resource_id="PDB_LV",
    key="GBR+FRA+CAN+ITA+DEU+JPN+USA.T_GDPEMP.CPC/all?startTime=2010",
).to_pandas()

df = pd.DataFrame(data).reset_index()
df.head()

In [None]:
data.content["OECD.SDD.NAD:DSD_NAMAIN1@DF_QNA_EXPENDITURE_CAPITA(1.0)"]

In [None]:
pd.read_xml(
    "https://sdmx.oecd.org/public/rest/data/OECD.SDD.NAD,DSD_NAMAIN1@DF_QNA_EXPENDITURE_CAPITA,1.0/Q............?startPeriod=2022-Q4&dimensionAtObservation=AllDimensions",
)