Import Packages

In [2]:
import pandas as pd
import plotly.graph_objects as go
import polars as pl
import pyarrow.parquet as pq

Data Ingestion

In [3]:
ballot_measures_df = pd.read_parquet('../003_data/001_raw-data/2018-2022_ballot-measure_sub-county_data.parquet')

# select congressional district sub county for state summary
ballot_measures_county_df = ballot_measures_df[ballot_measures_df['sub_county'].str.contains('Congressional')].copy()

In [11]:
# convert vote_count to int from string
ballot_measures_county_df['vote_count'] = ballot_measures_county_df['vote_count'].astype(int)
# split vote counts by year and vote type
year_vote_count_summary = ballot_measures_county_df.groupby(['year', 'vote_type']).agg({'vote_count':'sum'}).reset_index()

In [None]:
year_vote_count_summary

In [12]:
# calculates the percentage of yes votes for a given year
def get_yes_percent(df_in, year_in):
    df_temp = df_in[df_in['year']==year_in]
    vote_sum = df_temp['vote_count'].sum(axis=0)
    return (df_temp[df_temp['vote_type']=='yes']['vote_count'].values[0]/vote_sum)*100

Data Manipulation

In [13]:
# calls yes percentage vote method for each year and stores in dictionary
vote_change_over_time_dict = {2018: get_yes_percent(year_vote_count_summary, 2018),
                              2020: get_yes_percent(year_vote_count_summary, 2020),
                              2022: get_yes_percent(year_vote_count_summary, 2022)}

# calculates the % drop in yes vote percentage between the three years
vote_diff_between = {2019: round(vote_change_over_time_dict[2020]-vote_change_over_time_dict[2018]),
                     2021: round(vote_change_over_time_dict[2022]-vote_change_over_time_dict[2020])}

Plotting Yes Vote Over Time Figure

In [None]:
# plotly source: https://plotly.com/python/line-and-scatter/, https://plotly.com/python/text-and-annotations/,
# https://python-graph-gallery.com/523-plotly-add-annotation/


fig = go.Figure()


yes_percentages = [x for x in list(vote_change_over_time_dict.values())]
# rounds yes percentages and adds % for plot labeling
yes_percentages_str = [' '+str(round(x))+'%' for x in list(vote_change_over_time_dict.values())]

# rounds yes percentage changes and adds % for plot labeling
diff_btwn_years = [str(x)+'%' for x in list(vote_diff_between.values())]

fig.add_trace(go.Scatter( 
    x=list(vote_change_over_time_dict.keys()),
    y= yes_percentages, 
    mode='markers+text', 
    text=yes_percentages_str,
    textposition='middle right',

    marker=dict(
        size=10, 
        color='black'
    ),
    textfont=dict(
        size=16,
        color='black'
    ),
    showlegend=False,
    name='percent yes'
))

fig.add_trace(go.Scatter(
    x=[2019, 2021],
    y=[39, 35],
    mode="markers+text",
    marker=dict(symbol='triangle-down', 
                size=10, 
                color='red'
    ),
    text=diff_btwn_years,
    textposition="bottom right",
    showlegend=False,
    name='percent change',
))

fig.update_yaxes(range=[25, 45])
fig.update_xaxes(range=[2017, 2023])

fig.update_layout(
    title='Percentage of Yes Votes for Dialysis Clinic Requirements Propositions in California',
    yaxis_title="Percentage of Yes Votes",
    xaxis_title="Dialysis Clinic Requirements Proposition Year",
    width=850,
    xaxis=dict(nticks=4,
               showgrid=False
    )
)

fig.show()

Download Image

In [9]:
#fig.write_html("../007_visualizations/Yes Vote Percentages Propositions in Californa.html")