In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import helper_400

%load_ext autoreload
%autoreload 2
helper_400.set_sns_style()

In [None]:
%load_ext lab_black

In [None]:
import geopandas as gpd

data = gpd.read_file("outputs/hotspot_vind_censustract_wc.geojson")

In [None]:
data["E_WHITE"] = data["E_TOTPOP"] - data[
    ["E_AFAM", "E_HISP", "E_ASIAN", "E_AIAN", "E_NHPI", "E_TWOMORE", "E_OTHERRACE"]
].sum(axis=1)

In [None]:
len(data[data["_hws"] != 0])

In [None]:
# normalize demographic variables by each groups' total population
demographic_cols = [
    "E_AFAM",
    "E_HISP",
    "E_ASIAN",
    "E_AIAN",
    "E_NHPI",
    "E_TWOMORE",
    "E_OTHERRACE",
    "E_WHITE",
    "E_TOTPOP",
]

for dc in demographic_cols:
    data[dc + "_normalized"] = data[dc] * 100 / data[dc].sum()

data = data.rename(
    columns={
        "E_WHITE_normalized": "white",
        "E_AFAM_normalized": "black",
        "E_HISP_normalized": "hispanic",
        "E_ASIAN_normalized": "asian",
        "E_AIAN_normalized": "indian or \nalaska native",
    }
)

cats = ["white", "black", "hispanic", "asian", "indian or \nalaska native"]

## Bar plots normalized per population

In [None]:
data.columns

In [None]:
import plotting

hspts = plotting.get_hotspot_list()

In [None]:
hspts

In [None]:
df = data

In [None]:
for hspt in hspts:
    df["exposure_category"] = helper_400.get_cut_vars(hspt, df[hspt])

    df_melt = pd.melt(
        df[cats + ["exposure_category"]],
        id_vars="exposure_category",
        var_name="type",
        value_name="value",
    )

    plt.figure(figsize=(5, 4))

    sns.barplot(
        data=df_melt,
        x="exposure_category",
        y="value",
        hue="type",
        errorbar="ci",
    )

    plt.title(helper_400.get_title(hspt))

    plt.ylabel("Proportion of the demographic group (%)")
    plt.xlabel("Exposure severity (higher is worse)")

    if hspt in [
        "wfday",
        "wfday_2d",
        "hw",
        "hws",
        "ws",
        "hws5",
        "ws5",
        "hws_2d",
        "ws_2d",
    ]:
        xlabel = "Days (intervals) of exposure"
        plt.xlabel(xlabel)

    plt.legend(title="Group", loc="best")
    plt.xticks(rotation=0)
    # ax.set_yscale('log')
    plt.tight_layout()
    plt.savefig("figures/qbar_demographics" + "_" + hspt + "_wc.png")
    plt.show()

## Obsolete

In [None]:
for hspt in hotspots.keys():
    df["exposure_category"] = helper_400.get_cut_vars(hspt, df[hspt])

    exposure_per_category = df.groupby("exposure_category")[demographic_cols].sum()

    for dc in demographic_cols:
        exposure_per_category[dc + "_normalized"] = (
            exposure_per_category[dc] * 100 / exposure_per_category[dc].sum()
        )

    exposure_per_category = exposure_per_category.drop(columns=demographic_cols)

    exposure_per_category = exposure_per_category.drop(
        columns=[
            "E_TOTPOP_normalized",
            "E_NHPI_normalized",
            "E_TWOMORE_normalized",
            "E_OTHERRACE_normalized",
        ]
    )

    exposure_per_category = exposure_per_category[
        [
            "E_WHITE_normalized",
            "E_AFAM_normalized",
            "E_HISP_normalized",
            "E_ASIAN_normalized",
            "E_AIAN_normalized",
        ]
    ]

    exposure_per_category = exposure_per_category.rename(
        columns={
            "E_WHITE_normalized": "white",
            "E_AFAM_normalized": "black",
            "E_HISP_normalized": "hispanic",
            "E_ASIAN_normalized": "asian",
            "E_AIAN_normalized": "indian or \nalaska native",
        }
    )

    helper_400.plot_qbar(exposure_per_category, hspt, figname="race", legloc="best")

In [None]:
# misc

df["exposure_category"] = helper_400.get_cut_vars("heatday", df["heatday"])

df_melt = pd.melt(
    df[cats + ["exposure_category"]],
    id_vars="exposure_category",
    var_name="type",
    value_name="sum_value",
)

df_melt["std"] = df_melt.groupby(["exposure_category", "type"]).transform("std")

df_agg = (
    df_melt.groupby(["exposure_category", "type"])[["sum_value", "std"]]
    .sum()
    .reset_index()
)
# Create separate bar plots for 'total_bill' and 'tip'
for value_type in cats:
    df_sub = df_agg[df_agg["type"] == value_type]
    plt.figure()
    sns.barplot(x="exposure_category", y="sum_value", data=df_sub, yerr=df_sub["std"])
    plt.title(f"Barplot for {value_type}")
    plt.show()