In [None]:
import os

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [None]:
raw_data = pd.read_csv(os.path.join("..", "data", "glen-data.csv"))
raw_data.describe()

In [None]:
# Hard Parameters
FIG_SIZE = (16, 8)
DPI = 300 # Figure dots per inch (resolution)

In [None]:
raw_data["YEAR"].plot.hist(bins = raw_data["YEAR"].unique().__len__())
plt.xlabel("Year")
plt.ylabel("Num Bills Analyzed [-]");

In [None]:
plt.scatter(raw_data["YEAR"], raw_data["OUTCOME"])
plt.ylim([0, 5])
plt.ylabel("Duration after question leg. adopted [years]")
plt.show()

In [None]:
mod_data = raw_data.__deepcopy__()
mod_data["switcher"][mod_data["switcher"] == 0] = -1
plt.bar(mod_data["YEAR"].unique(), [mod_data.groupby("YEAR").get_group(year)["switcher"].sum() for year in mod_data["YEAR"].unique()])
plt.ylabel("Legislation Change Preference [-]")
plt.title("(-) reflects preference for change, (+) for status quo")

In [None]:
# Try to replicate paper figures
total_policies = raw_data.index.__len__()
policy_adopted = raw_data[raw_data["OUTCOME"] != 0]
n_adopted = policy_adopted.index.__len__()
# print(f"{total_policies = }, {n_adopted = }")
n_bins = 10

# Average Citizens
average_citizen_percent_favoring = np.linspace(0, 100, n_bins + 1)
average_citizen_predicted_prob_adoption = [
    policy_adopted[
        (policy_adopted["pred50_sw"] > i * n_bins / 100)
        & (policy_adopted["pred50_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data[
        (raw_data["pred50_sw"] > i * n_bins / 100)
        & (raw_data["pred50_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    * 100
    for i in range(n_bins)
]
average_citizen_percent_of_cases = [
    raw_data[
        (raw_data["pred50_sw"] > i * n_bins / 100)
        & (raw_data["pred50_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data.index.__len__()
    * 100
    for i in range(n_bins)
]

fig, ax = plt.subplots(figsize=FIG_SIZE)
ax.plot(
    average_citizen_percent_favoring[0:n_bins] + 5,
    average_citizen_predicted_prob_adoption,
    linewidth=5,
    color="k",
)
ax.set_xlim([0, 100])
ax.set_ylim([0.0, 70])
ax.set_xlabel("Percent favoring proposed policy changes [%]")
ax.set_ylabel("Predicted probability of adoption [%]")
ax.set_xticks(average_citizen_percent_favoring)
ax.set_title("Average Citizens' Preferences")
ax.set_zorder(1)
ax.patch.set_visible(False)

ax2 = ax.twinx()
ax2.bar(
    average_citizen_percent_favoring[0:n_bins] + 5,
    average_citizen_percent_of_cases,
    width=9.5,
    color="grey",
)
ax2.set_ylabel("Percent of cases (grey columns) [%]")
ax2.set_ylim([0, 40])

# plt.savefig(os.path.join("..", "images", "average-citizens-preferences.png"), dpi=DPI)
plt.show()

In [None]:
affluent_percent_favoring = np.linspace(0, 100, n_bins + 1)
affluent_predicted_prob_adoption = [
    policy_adopted[
        (policy_adopted["pred90_sw"] > i * n_bins / 100)
        & (policy_adopted["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data[
        (raw_data["pred90_sw"] > i * n_bins / 100)
        & (raw_data["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    * 100
    for i in range(n_bins)
]
affluent_percent_of_cases = [
    raw_data[
        (raw_data["pred90_sw"] > i * n_bins / 100)
        & (raw_data["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data.index.__len__()
    * 100
    for i in range(n_bins)
]

fig, ax = plt.subplots(figsize=FIG_SIZE)
ax.plot(
    affluent_percent_favoring[0:n_bins] + 5,
    affluent_predicted_prob_adoption,
    linewidth=5,
    color="k",
)
ax.set_xlim([0, 100])
ax.set_ylim([0.0, 70])
ax.set_xlabel("Percent favoring proposed policy changes [%]")
ax.set_ylabel("Predicted probability of adoption [%]")
ax.set_xticks(affluent_percent_favoring)
ax.set_title("Economic Elites' Preferences")
ax.set_zorder(1)
ax.patch.set_visible(False)

ax2 = ax.twinx()
ax2.bar(
    affluent_percent_favoring[0:n_bins] + 5,
    affluent_percent_of_cases,
    width=9.5,
    color="grey",
)
ax2.set_ylabel("Percent of cases (grey columns) [%]")
ax2.set_ylim([0, 40])

# plt.savefig(os.path.join("..", "images", "economic-elites-preferences.png"), dpi=DPI)
plt.show()

In [None]:
interest_groups_percent_favoring = np.linspace(0, 100, n_bins + 1)
interest_groups_predicted_prob_adoption = [
    policy_adopted[
        (policy_adopted["pred90_sw"] > i * n_bins / 100)
        & (policy_adopted["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data[
        (raw_data["pred90_sw"] > i * n_bins / 100)
        & (raw_data["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    * 100
    for i in range(n_bins)
]
interest_groups_percent_of_cases = [
    raw_data[
        (raw_data["pred90_sw"] > i * n_bins / 100)
        & (raw_data["pred90_sw"] < (i + 1) * n_bins / 100)
    ].index.__len__()
    / raw_data.index.__len__()
    * 100
    for i in range(n_bins)
]

fig, ax = plt.subplots(figsize=FIG_SIZE)
ax.plot(
    interest_groups_percent_favoring[0:n_bins] + 5,
    interest_groups_predicted_prob_adoption,
    linewidth=5,
    color="k",
)
ax.set_xlim([0, 100])
ax.set_ylim([0.0, 70])
ax.set_xlabel("Percent favoring proposed policy changes [%]")
ax.set_ylabel("Predicted probability of adoption [%]")
ax.set_xticks(interest_groups_percent_favoring)
ax.set_title("Economic Elites' Preferences")
ax.set_zorder(1)
ax.patch.set_visible(False)

ax2 = ax.twinx()
ax2.bar(
    interest_groups_percent_favoring[0:n_bins] + 5,
    affluent_percent_of_cases,
    width=9.5,
    color="grey",
)
ax2.set_ylabel("Percent of cases (grey columns) [%]")
ax2.set_ylim([0, 40])

# plt.savefig(os.path.join("..", "images", "interest-group-preferences.png"), dpi=DPI)
plt.show()

In [None]:
print(
    f"# Policies adopted in average citizen (50th wealth percentile) {n_bins} percent favoring bins:",
    [
        policy_adopted[
            (policy_adopted["pred50_sw"] > i * n_bins / 100)
            & (policy_adopted["pred50_sw"] < (i + 1) * n_bins / 100)
        ].index.__len__()
        for i in range(n_bins)
    ],
)
print(
    f"# Policies adopted in affluent citizen (90th wealth percentile) {n_bins} percent favoring bins:",
    [
        policy_adopted[
            (policy_adopted["pred90_sw"] > i * n_bins / 100)
            & (policy_adopted["pred90_sw"] < (i + 1) * n_bins / 100)
        ].index.__len__()
        for i in range(n_bins)
    ],
)

print(
    f"Total number of passed legislations in dataset: {policy_adopted.index.__len__()}"
)
print(f"Total number of proposed legislations in dataset: {raw_data.index.__len__()}")
print(
    f"Average chance to pass in dataset: {policy_adopted.index.__len__()/raw_data.index.__len__() * 100:0.1f}%"
)
print(
    f"% Policies adopted in average citizen (50th wealth percentile) {n_bins} percent favoring bins:",
    [
        round(policy_adopted[
            (policy_adopted["pred50_sw"] > i * n_bins / 100)
            & (policy_adopted["pred50_sw"] < (i + 1) * n_bins / 100)
        ].index.__len__()
        / raw_data[
            (raw_data["pred50_sw"] > i * n_bins / 100)
            & (raw_data["pred50_sw"] < (i + 1) * n_bins / 100)
        ].index.__len__()
        * 100, 1)
        for i in range(n_bins)
    ],
)

In [None]:
cols = [f"pred{val}_sw" for val in [10, 30, 50, 70, 90]]
plt.matshow(raw_data[cols].corr(method = "pearson"), vmin = 0, vmax = 1)
plt.xticks(range(len(cols)), cols, rotation = 45)
plt.yticks(range(len(cols)), cols)
plt.title("Correlation check between income brackets")
plt.colorbar()
plt.show()

cols = [f"pred{val}_sw" for val in [10, 30, 50, 70, 90]]
plt.matshow(raw_data[cols].corr(method="pearson"))
plt.xticks(range(len(cols)), cols, rotation=45)
plt.yticks(range(len(cols)), cols)
plt.title("Correlation check between income brackets")
plt.colorbar()
plt.show()

In [None]:
[raw_data[col].plot.hist(bins=10, alpha = 0.5) for col in cols]
plt.xlabel("Proportion in favor of legislation [-]")
plt.title("Normal distribution check")
plt.legend()
plt.show()

In [None]:
print(policy_adopted["pred90_sw"].describe())
print(policy_adopted["pred50_sw"].describe())
print((policy_adopted["pred90_sw"] - policy_adopted["pred50_sw"]).describe())
print(raw_data["YEAR"].describe())

In [None]:
leg_areas = raw_data["XL_AREA"].unique().tolist()
# print(leg_areas)
years = raw_data["YEAR"].unique().tolist()
age_groups = list(set([col.split("_")[0] for col in raw_data.columns if "age" in col]))
age_groups.sort(key = lambda s: s.split("age")[1])
# print(age_groups)
fav_opp_dk = ["fav", "opp", "dk"]

# There are 12 "age/income/etc." delineations
nrows = 4
ncols = 3
FIG_SIZE = (16, 8)

age_groups.sort(key=lambda a: int(a.split("age")[1]))
leg_grouped_data = raw_data.groupby("XL_AREA")
for leg_area in leg_areas[0:1]:
    leg_data = leg_grouped_data.get_group(leg_area)

    fig, axes = plt.subplots(
        nrows=nrows, ncols=ncols, figsize=FIG_SIZE
    )  # there are 12 "age/income/etc" delineations
    row = col = 0
    for i, age_group in enumerate(age_groups):
        total_people = leg_data[[f"{age_group}_{fav}" for fav in fav_opp_dk]].sum(axis = 1)
        # print(f"{total_people = }")
        for_opp_dk_prop = [leg_data[f"{age_group}_{fav}"] / total_people for fav in fav_opp_dk]
        # print(for_opp_dk_prop[0].to_list())

        axes[row][col].bar(
            range(for_opp_dk_prop[0].index.__len__()),
            (for_opp_dk_prop[0] * 100).to_list()
        )
        axes[row][col].bar(
            range(for_opp_dk_prop[0].index.__len__()),
            (for_opp_dk_prop[1] * 100).to_list(),
            bottom=(for_opp_dk_prop[0] * 100).to_list(),
        )
        axes[row][col].bar(
            range(for_opp_dk_prop[0].index.__len__()),
            (for_opp_dk_prop[2] * 100).to_list(),
            bottom=((for_opp_dk_prop[0] + for_opp_dk_prop[1]) * 100).to_list(),
        )

        # axes[row][col].set_xticklabels(for_opp_dk_prop[0].index)
        axes[row][col].set_ylabel(f"Age Group {i + 1} [%]")
        axes[row][col].set_ylim([0, 100])

        col += 1
        if col == 3:
            col = 0
            row += 1

    axes[nrows - 1][ncols - 1].legend(["For", "Against", "Undecided"])
    fig.suptitle(
        f"{leg_area.capitalize()} Policy: % Average Favorability by Age Group (Young $\Rightarrow$ Old)"
    )
    plt.show()