In [250]:
import numpy as np
import pandas as pd
from pandas import json_normalize
import geopandas as gpd
from geopandas import GeoDataFrame
import xlrd
from random import randint
import requests
from io import BytesIO
from datetime import datetime

pd.set_option('display.max_columns', None)

## Reading spatial and excel files for 2025:

In [251]:
gdf = gpd.read_file("./shp/SF_DOE_PREC_2022_07_18_pg.shp", encoding='utf-8')
gdf = gdf.to_crs('EPSG:4269')

In [252]:
url = 'https://www.sfelections.org/results/20250916/data/20250918_1/sov.xlsx'

In [253]:
# date = datetime.today().strftime("%Y%m%d")

# election_date = "20250916"

# # Build the URL
# url = f"https://www.sfelections.org/results/{election_date}/data/{date}/sov.xlsx"

print(url)

https://www.sfelections.org/results/20250916/data/20250918_1/sov.xlsx


In [254]:
response = requests.get(url)

In [255]:
dfTurnout = pd.read_excel(BytesIO(response.content), sheet_name='Sheet1', header=4, skipfooter=10)
dfA = pd.read_excel(BytesIO(response.content), sheet_name='Sheet2', header=3, skipfooter=8)

In [256]:
dfA

Unnamed: 0,Precinct,Registered \nVoters,Undervotes,Unnamed: 3,Overvotes,Precinct.1,Yes\n,Unnamed: 7,No\n,Unnamed: 9,Total Votes,Unnamed: 11
0,Electionwide,,,,,Electionwide,,,,,,
1,Electionwide,,,,,Electionwide,,,,,,
2,PCT 9401,,,,,PCT 9401,,,,,,
3,Election Day,2566.0,0.0,,0.0,Election Day,44.0,65.67%,23.0,34.33%,67.0,
4,Vote by Mail,2566.0,2.0,,0.0,Vote by Mail,582.0,60.00%,388.0,40.00%,970.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
77,Total,594.0,0.0,,0.0,Total,252.0,80.25%,62.0,19.75%,314.0,
78,PCT 9451,,,,,PCT 9451,,,,,,
79,Election Day,2383.0,0.0,,0.0,Election Day,24.0,80.00%,6.0,20.00%,30.0,
80,Vote by Mail,2383.0,2.0,,0.0,Vote by Mail,730.0,75.18%,241.0,24.82%,971.0,


In [257]:
dfTurnout

Unnamed: 0,Precinct,Registered\nVoters,Unnamed: 2,Voters Cast,Unnamed: 4,% Turnout,Unnamed: 6,Unnamed: 7
0,Electionwide,,,,,,,
1,Electionwide,,,,,,,
2,PCT 9401,,,,,,,
3,Election Day,2566.0,,67.0,,0.026111,,
4,Vote by Mail,2566.0,,972.0,,0.378800,,
...,...,...,...,...,...,...,...,...
77,Total,594.0,,314.0,,0.528620,,
78,PCT 9451,,,,,,,
79,Election Day,2383.0,,30.0,,0.012589,,
80,Vote by Mail,2383.0,,973.0,,0.408309,,


### Formatting

In [258]:
###
### TURNOUT
###

#trim and rename columns
dfTurnout = dfTurnout[['Precinct','Registered\nVoters','Voters Cast']]
dfTurnout.columns = ['precinct','registered_voters','votes_cast']

# shift precinct column

dfTurnout['precinct'] = dfTurnout['precinct'].shift(3)

#drop unnecessary ones
dfTurnout = dfTurnout.dropna()
dfTurnout = dfTurnout[dfTurnout["precinct"].str.contains('PCT')]

#tidy up precinct column data
dfTurnout['precinct'] = dfTurnout['precinct'].str.replace('PCT ','').str.replace(' MB','')

#change data types
dfTurnout.registered_voters = dfTurnout.registered_voters.astype(int)
dfTurnout.votes_cast = dfTurnout.votes_cast.astype(int)

#create turnout column
dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1)
dfTurnout['turnout'] = dfTurnout['turnout'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfTurnout['precinct'] = dfTurnout['precinct'].shift(3)


In [259]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,1039,40.5
9,9403,2247,965,42.9
13,9404,3243,1344,41.4
17,9406,2415,813,33.7
21,9408,2979,1285,43.1
25,9413,3296,1385,42.0
29,9414,2227,868,39.0
33,9417,2211,1008,45.6
37,9421,2106,771,36.6
41,9423,2363,1113,47.1


In [260]:
###
### PROPS
###

def process_proposition(df):
    # Standardize column names
    df.columns = df.columns.str.replace('\n', '', regex=True)
    df.columns = df.columns.str.replace('"', '', regex=True)
    df.columns = df.columns.str.replace('.', '', regex=True)
    df.columns = df.columns.str.lower().str.replace(' ', '_', regex=True)
    df.columns = df.columns.str.replace('_$', '', regex=True)

    # if the column name contains "bonds_-_yes", replace it with "yes"

    df.columns = df.columns.str.replace('bonds_-_yes', 'yes', regex=True)
    df.columns = df.columns.str.replace('bonds_-_no', 'no', regex=True)
    
    # Trim and rename columns to focus on necessary data
    df = df[['precinct', 'registered_voters', 'yes', 'no', 'total_votes']]
    df.columns = ['precinct', 'registered_voters', 'yes', 'no', 'votes_cast']

    # shift the precinct column up by 3 rows

    df['precinct'] = df['precinct'].shift(3)

    # Drop rows where precinct is NaN after the shift
    df = df.dropna(subset=['precinct'])
    df = df[df['precinct'].str.contains('PCT', na=False)]

    # Clean up precinct column data
    df['precinct'] = df['precinct'].str.replace('PCT ', '', regex=True).str.replace(' MB', '', regex=True)

    # Ensure numeric columns are of the correct type
    # df['registered_voters'] = pd.to_numeric(df['registered_voters'], errors='coerce').fillna(0).astype(int)
    # df['votes_cast'] = pd.to_numeric(df['votes_cast'], errors='coerce').fillna(0).astype(int)

    # Calculate turnout
    df['turnout'] = round((df['votes_cast'] / df['registered_voters']) * 100, 1).fillna(0)

    return df

In [261]:
dfA = process_proposition(dfA)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['precinct'] = df['precinct'].shift(3)


In [262]:
bins = [0, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 100]
labels = ['Less than 25%', '25-30%', '30-35%', '35-40%', '40-45%', '45-50%', '50-55%', '55-60%', '60-65%', '65-70%', '70-75%', '75% and more']

In [263]:
# I want to generate random numbers for yes and no columns in dfA, but the sum of yes and no should be  no more than registered_voters

# def generate_yes_no(row):
#     registered_voters = row['registered_voters']
#     if registered_voters == 0:
#         return pd.Series([0, 0])
    
#     yes = randint(0, registered_voters)
#     no = randint(0, registered_voters - yes)
    
#     return pd.Series([yes, no])

# # make votes_cast the sum of yes and no

# dfA[['yes', 'no']] = dfA.apply(generate_yes_no, axis=1)
# dfA['votes_cast'] = dfA['yes'] + dfA['no']
# dfA['turnout'] = round((dfA['votes_cast'] / dfA['registered_voters']) * 100, 1).fillna(0)

In [264]:
dfTurnout.head()

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,1039,40.5
9,9403,2247,965,42.9
13,9404,3243,1344,41.4
17,9406,2415,813,33.7
21,9408,2979,1285,43.1


In [265]:
# do the same thing for dfTurnout. This time we are just populating the votes_cast column with random numbers

# dfTurnout['votes_cast'] = dfTurnout.apply(
#     lambda row: randint(0, row['registered_voters']) if row['registered_voters'] > 0 else 0,
#     axis=1
# )
# dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1).fillna(0)

In [266]:
dfA

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,turnout
5,9401,2566.0,626.0,411.0,1037.0,40.4
9,9403,2247.0,595.0,366.0,961.0,42.8
13,9404,3243.0,791.0,547.0,1338.0,41.3
17,9406,2415.0,435.0,377.0,812.0,33.6
21,9408,2979.0,707.0,572.0,1279.0,42.9
25,9413,3296.0,903.0,478.0,1381.0,41.9
29,9414,2227.0,481.0,387.0,868.0,39.0
33,9417,2211.0,622.0,386.0,1008.0,45.6
37,9421,2106.0,479.0,292.0,771.0,36.6
41,9423,2363.0,577.0,533.0,1110.0,47.0


In [267]:
# Calculate voter turnout percentage
dfTurnout['yes_perc'] = (dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100

# Categorize turnout percentages into bins
dfTurnout['yes_perc'] = pd.cut(dfTurnout['yes_perc'], bins=bins, labels=labels, include_lowest=True)

In [268]:
dfA['precinct'] = dfA['precinct'].astype(int)
dfA['yes_perc'] = (dfA['yes'] / dfA['votes_cast']) * 100
dfA['yes_perc_bin'] = pd.cut(dfA['yes_perc'], bins=bins, labels=labels, include_lowest=True)
dfA['yes_perc'] = dfA['yes_perc'].fillna('no data')
dfA['yes_perc_bin'] = dfA['yes_perc_bin'].astype(str)

## Bar chart

copy for bar chart

In [269]:
# yes,no,margin,threshold,total,Proposition,yes_p,no_p
# 282968.0,94329.0,188639.0,50.0,377297.0,A,75.0,25.0

In [270]:
# make a bottom row with the totals of each column

total_row = pd.DataFrame({
    'precinct': ['Total'],
    'registered_voters': [dfA['registered_voters'].sum()],
    'yes': [dfA['yes'].sum()],
    'no': [dfA['no'].sum()],
    'votes_cast': [dfA['votes_cast'].sum()]
    })

total_row

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast
0,Total,50273.0,13332.0,7879.0,21211.0


In [271]:
# make a yes_p and no_p column that is the percentage of yes and no votes

total_row['yes_p'] = round((total_row['yes'] / total_row['votes_cast']) * 100, 1).fillna(0)
total_row['no_p'] = round((total_row['no'] / total_row['votes_cast']) * 100, 1).fillna(0)

In [272]:
# make a column that is called proposition and set it to A

total_row['proposition'] = 'A'
total_row['threshold'] = 50.0

In [273]:
total_row

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,yes_p,no_p,proposition,threshold
0,Total,50273.0,13332.0,7879.0,21211.0,62.9,37.1,A,50.0


In [274]:
# add a turnout column

total_row['turnout'] = round((total_row['votes_cast'] / total_row['registered_voters']) * 100, 1).fillna(0)

In [275]:
# export to docs/bar-chart/data.csv

total_row.to_csv('./docs/bar-chart/data.csv', index=False)

## Reading files for 2022

In [276]:
url = 'https://www.sfelections.org/results/20221108/data/20221201/sov.xlsx'

In [277]:
response = requests.get(url)

In [278]:
dfd4 = pd.read_excel(BytesIO(response.content), sheet_name='Sheet43', header=3, skipfooter=8)

In [279]:
dfd4.head()

Unnamed: 0,Precinct,Registered \nVoters,Undervotes,Unnamed: 3,Overvotes,Precinct.1,JOEL ENGARDIO\n,Unnamed: 7,GORDON MAR\n,Unnamed: 9,Write-in\n,Unnamed: 11,Unnamed: 12,Total Votes
0,Countywide,,,,,Countywide,,,,,,,,
1,Electionwide,,,,,Electionwide,,,,,,,,
2,PCT 9401,,,,,PCT 9401,,,,,,,,
3,Election Day,1215.0,9.0,,0.0,Election Day,44.0,41.90%,61.0,58.10%,0.0,,0.00%,105.0
4,Vote by Mail,1215.0,56.0,,0.0,Vote by Mail,223.0,38.65%,354.0,61.35%,0.0,,0.00%,577.0


In [280]:
### DISTRICT 4

# make all columns lowercase and remove spaces
dfd4.columns = dfd4.columns.str.replace('\n', '')
dfd4.columns = dfd4.columns.str.lower().str.replace(' ', '_')
dfd4.columns = dfd4.columns.str.replace('_$', '')

dfd4['precinct'] = dfd4['precinct'].shift(3)

dfd4 = dfd4[['precinct', 'registered_voters', 'joel_engardio', 'gordon_mar','total_votes']]

#drop unnecessary ones
dfd4 = dfd4.dropna()
dfd4 = dfd4[dfd4["precinct"].str.contains('PCT')]

#tidy up precinct column data
dfd4['precinct'] = dfd4['precinct'].str.replace('PCT ','').str.replace(' MB','')

  dfd4.columns = dfd4.columns.str.replace('_$', '')


In [281]:
groupings = {
    "9401": ["9401", "9402"], 
    "9403": ["9403", "9411"],
    "9404": ["9404", "9405", "9412"],
    "9406": ["9406", "9407"],
    "9408": ["9408", "9409", "9416"],
    "9413": ["9413", "9418", "9419"], 
    "9414": ["9414", "9415"],
    "9417": ["9417", "9424"],
    "9421": ["9421", "9422"], 
    "9423": ["9423", "9428"], 
    "9425": ["9425", "9426", "9431"],
    "9427": ["9427", "9433", "9434"],
    "9429": ["9429", "9436", "9438"], 
    "9432": ["9432", "9441"], 
    "9435": ["9435", "9443"],
    "9437": ["9437", "9444", "9445"],
    "9439": ["9439","9446"], 
    "9442": ["9442", "9447", "9448"],
    "9449": ["9449"],
    "9451": ["9451", "9452"]
}

In [282]:
dfd4

Unnamed: 0,precinct,registered_voters,joel_engardio,gordon_mar,total_votes
5,9401,1215.0,267.0,415.0,682.0
9,9402,1188.0,269.0,377.0,646.0
13,9403,1029.0,261.0,310.0,571.0
17,9404,982.0,284.0,271.0,555.0
21,9405,1060.0,321.0,283.0,604.0
25,9406,1174.0,340.0,302.0,642.0
29,9407,1138.0,252.0,362.0,614.0
33,9408,979.0,221.0,317.0,538.0
37,9409,823.0,211.0,243.0,454.0
41,9411,1094.0,280.0,325.0,605.0


In [283]:
# If a precint matches a key in the groupings dictionary, add up the rows that match the values in the list and create a new row with the key as the precinct

def group_precincts(df, groupings):
    grouped_data = []

    for key, values in groupings.items():
        subset = df[df['precinct'].isin(values)]
        if not subset.empty:
            aggregated_row = {
                'precinct': key,
                'registered_voters': subset['registered_voters'].sum(),
                'joel_engardio': subset['joel_engardio'].sum(),
                'gordon_mar': subset['gordon_mar'].sum(),
                'total_votes': subset['total_votes'].sum()
            }
            grouped_data.append(aggregated_row)

    grouped_df = pd.DataFrame(grouped_data)
    return grouped_df

dfd4_grouped = group_precincts(dfd4, groupings)

In [284]:
dfd4_grouped

Unnamed: 0,precinct,registered_voters,joel_engardio,gordon_mar,total_votes
0,9401,2403.0,536.0,792.0,1328.0
1,9403,2123.0,541.0,635.0,1176.0
2,9404,3130.0,953.0,840.0,1793.0
3,9406,2312.0,592.0,664.0,1256.0
4,9408,2868.0,717.0,897.0,1614.0
5,9413,3143.0,895.0,818.0,1713.0
6,9414,2152.0,615.0,572.0,1187.0
7,9417,2173.0,656.0,592.0,1248.0
8,9421,1992.0,545.0,550.0,1095.0
9,9423,2264.0,677.0,686.0,1363.0


In [285]:
dfd4_grouped["joel_engardio_p"] = round((dfd4_grouped["joel_engardio"] / dfd4_grouped["total_votes"]) * 100, 1)
dfd4_grouped["gordon_mar_p"] = round((dfd4_grouped["gordon_mar"] / dfd4_grouped["total_votes"]) * 100, 1)

In [286]:
dfd4_grouped

Unnamed: 0,precinct,registered_voters,joel_engardio,gordon_mar,total_votes,joel_engardio_p,gordon_mar_p
0,9401,2403.0,536.0,792.0,1328.0,40.4,59.6
1,9403,2123.0,541.0,635.0,1176.0,46.0,54.0
2,9404,3130.0,953.0,840.0,1793.0,53.2,46.8
3,9406,2312.0,592.0,664.0,1256.0,47.1,52.9
4,9408,2868.0,717.0,897.0,1614.0,44.4,55.6
5,9413,3143.0,895.0,818.0,1713.0,52.2,47.8
6,9414,2152.0,615.0,572.0,1187.0,51.8,48.2
7,9417,2173.0,656.0,592.0,1248.0,52.6,47.4
8,9421,1992.0,545.0,550.0,1095.0,49.8,50.2
9,9423,2264.0,677.0,686.0,1363.0,49.7,50.3


In [287]:
# make a tunrout column

dfd4_grouped['turnout'] = round((dfd4_grouped['total_votes'] / dfd4_grouped['registered_voters']) * 100, 1).fillna(0)

In [288]:
dfd4_grouped['winner'] = dfd4_grouped[['joel_engardio_p', 'gordon_mar_p']].idxmax(axis=1).str.replace('_p', '')

# Reading files for 2024

In [289]:
url = 'https://www.sfelections.org/results/20241105/data/20241203/sov.xlsx'

In [290]:
response = requests.get(url)

In [291]:
dfK = pd.read_excel(BytesIO(response.content), sheet_name='Sheet50', header=3, skipfooter=6)

In [292]:
dfK = process_proposition(dfK)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['precinct'] = df['precinct'].shift(3)


In [293]:
dfK

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,turnout
5,1101,881.0,232.0,371.0,603.0,68.4
9,1102,904.0,210.0,340.0,550.0,60.8
13,1103,107.0,23.0,53.0,76.0,71.0
17,1104,1209.0,325.0,536.0,861.0,71.2
21,1105,1096.0,270.0,453.0,723.0,66.0
...,...,...,...,...,...,...
2041,9808,779.0,370.0,256.0,626.0,80.4
2045,9901,0.0,0.0,0.0,0.0,0.0
2049,9902,0.0,0.0,0.0,0.0,0.0
2053,9903,0.0,0.0,0.0,0.0,0.0


In [294]:
def group_precincts_k(df, groupings):
    grouped_data = []

    for key, values in groupings.items():
        subset = df[df['precinct'].isin(values)]
        if not subset.empty:
            aggregated_row = {
                'precinct': key,
                'registered_voters': subset['registered_voters'].sum(),
                'yes': subset['yes'].sum(),
                'no': subset['no'].sum(),
                'registered_voters': subset['registered_voters'].sum(),
                'votes_cast': subset['votes_cast'].sum()
            }
            grouped_data.append(aggregated_row)

    grouped_df = pd.DataFrame(grouped_data)
    return grouped_df

In [295]:
dfK = group_precincts_k(dfK, groupings)

In [296]:
# I only want to keep the precincts that are keys in the groupings dictionary

dfK = dfK[dfK['precinct'].isin(groupings.keys())]

In [297]:
# make a turnout column

dfK['turnout'] = round((dfK['votes_cast'] / dfK['registered_voters']) * 100, 1).fillna(0)

In [298]:
dfK['precinct'] = dfK['precinct'].astype(int)
dfK['yes_perc'] = (dfK['yes'] / dfK['votes_cast']) * 100
dfK['yes_perc_bin'] = pd.cut(dfK['yes_perc'], bins=bins, labels=labels, include_lowest=True)
dfK['yes_perc'] = dfK['yes_perc'].fillna('no data')
dfK['yes_perc_bin'] = dfK['yes_perc_bin'].astype(str)

In [299]:
dfK

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,turnout,yes_perc,yes_perc_bin
0,9401,2504.0,766.0,1125.0,1891.0,75.5,40.507668,40-45%
1,9403,2238.0,622.0,1052.0,1674.0,74.8,37.156511,35-40%
2,9404,3216.0,951.0,1466.0,2417.0,75.2,39.346297,35-40%
3,9406,2372.0,766.0,961.0,1727.0,72.8,44.354372,40-45%
4,9408,2945.0,970.0,1267.0,2237.0,76.0,43.361645,40-45%
5,9413,3272.0,836.0,1539.0,2375.0,72.6,35.2,35-40%
6,9414,2215.0,665.0,943.0,1608.0,72.6,41.355721,40-45%
7,9417,2197.0,588.0,1063.0,1651.0,75.1,35.614779,35-40%
8,9421,2076.0,571.0,883.0,1454.0,70.0,39.270977,35-40%
9,9423,2335.0,724.0,1074.0,1798.0,77.0,40.266963,40-45%


# Prepping gdf

In [300]:
gdf = gdf[['Prec_2022','geometry']]
gdf.Prec_2022 = gdf.Prec_2022.astype(str)

In [301]:
gdf

Unnamed: 0,Prec_2022,geometry
0,7042,"POLYGON ((-122.42165 37.71029, -122.42100 37.7..."
1,1107,"POLYGON ((-122.45595 37.71134, -122.45593 37.7..."
2,1145,"POLYGON ((-122.44617 37.71104, -122.44550 37.7..."
3,7043,"POLYGON ((-122.41508 37.71166, -122.41289 37.7..."
4,7046,"POLYGON ((-122.40973 37.71195, -122.40883 37.7..."
...,...,...
509,9201,"POLYGON ((-122.47754 37.81011, -122.47750 37.8..."
510,9202,"POLYGON ((-122.47725 37.81102, -122.47711 37.8..."
511,7301,"POLYGON ((-122.41991 37.81163, -122.41990 37.8..."
512,7645,"POLYGON ((-122.36862 37.83116, -122.36733 37.8..."


In [302]:
groupings = {
    "9401": ["9401", "9402"], 
    "9403": ["9403", "9411"],
    "9404": ["9404", "9405", "9412"],
    "9406": ["9406", "9407"],
    "9408": ["9408", "9409", "9416"],
    "9413": ["9413", "9418", "9419"], 
    "9414": ["9414", "9415"],
    "9417": ["9417", "9424"],
    "9421": ["9421", "9422"], 
    "9423": ["9423", "9428"], 
    "9425": ["9425", "9426", "9431"],
    "9427": ["9427", "9433", "9434"],
    "9429": ["9429", "9436", "9438"], 
    "9432": ["9432", "9441"], 
    "9435": ["9435", "9443"],
    "9437": ["9437", "9444", "9445"],
    "9439": ["9439","9446"], 
    "9442": ["9442", "9447", "9448"],
    "9449": ["9449"],
    "9451": ["9451", "9452"]
}

In [303]:
# I want to merge the precincts in gdf based on the groupings above. The new precinct column should be the key of the groupings dict

gdf['precinct'] = gdf['Prec_2022'].astype(str)

gdf['precinct'] = gdf['precinct'].apply(lambda x: next((key for key, values in groupings.items() if x in values), x))

gdf = gdf.dissolve(by='precinct', as_index=False).reset_index(drop=True)

gdf

Unnamed: 0,precinct,geometry,Prec_2022
0,1101,"POLYGON ((-122.47167 37.72163, -122.47078 37.7...",1101
1,1102,"POLYGON ((-122.46802 37.71610, -122.46800 37.7...",1102
2,1103,"POLYGON ((-122.46625 37.71968, -122.46715 37.7...",1103
3,1104,"POLYGON ((-122.46266 37.72000, -122.46267 37.7...",1104
4,1105,"POLYGON ((-122.46263 37.71742, -122.46262 37.7...",1105
...,...,...,...
482,9808,"POLYGON ((-122.44106 37.73497, -122.44031 37.7...",9808
483,9901,"POLYGON ((-122.43251 37.73311, -122.43233 37.7...",9901
484,9902,"POLYGON ((-122.40396 37.73161, -122.40340 37.7...",9902
485,9903,"POLYGON ((-122.33081 37.78579, -122.33061 37.7...",9903


In [304]:
# export to geojson

gdf.to_file("sf_precincts_merged.geojson", driver='GeoJSON', encoding='utf-8')

# Merging

In [305]:
# make dfTurnout yes_perc a string too

dfTurnout['yes_perc'] = dfTurnout['yes_perc'].astype(str)

In [306]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout,yes_perc
5,9401,2566,1039,40.5,40-45%
9,9403,2247,965,42.9,40-45%
13,9404,3243,1344,41.4,40-45%
17,9406,2415,813,33.7,30-35%
21,9408,2979,1285,43.1,40-45%
25,9413,3296,1385,42.0,40-45%
29,9414,2227,868,39.0,35-40%
33,9417,2211,1008,45.6,45-50%
37,9421,2106,771,36.6,35-40%
41,9423,2363,1113,47.1,45-50%


In [307]:
gdf

Unnamed: 0,precinct,geometry,Prec_2022
0,1101,"POLYGON ((-122.47167 37.72163, -122.47078 37.7...",1101
1,1102,"POLYGON ((-122.46802 37.71610, -122.46800 37.7...",1102
2,1103,"POLYGON ((-122.46625 37.71968, -122.46715 37.7...",1103
3,1104,"POLYGON ((-122.46266 37.72000, -122.46267 37.7...",1104
4,1105,"POLYGON ((-122.46263 37.71742, -122.46262 37.7...",1105
...,...,...,...
482,9808,"POLYGON ((-122.44106 37.73497, -122.44031 37.7...",9808
483,9901,"POLYGON ((-122.43251 37.73311, -122.43233 37.7...",9901
484,9902,"POLYGON ((-122.40396 37.73161, -122.40340 37.7...",9902
485,9903,"POLYGON ((-122.33081 37.78579, -122.33061 37.7...",9903


In [308]:
dfTurnout = gdf.merge(dfTurnout, right_on='precinct', left_on='precinct')
dfTurnout = dfTurnout[['precinct','registered_voters','votes_cast','yes_perc','turnout','geometry']]
gdfTurnout = GeoDataFrame(dfTurnout, crs="EPSG:4269", geometry='geometry')

In [309]:
dfA.dtypes

precinct               int64
registered_voters    float64
yes                  float64
no                   float64
votes_cast           float64
turnout              float64
yes_perc             float64
yes_perc_bin          object
dtype: object

In [310]:
# make dfA precinct string

dfA['precinct'] = dfA['precinct'].astype(str)

In [311]:
# merge 

dfA = gdf.merge(dfA, on='precinct')

In [312]:
dfA.columns

Index(['precinct', 'geometry', 'Prec_2022', 'registered_voters', 'yes', 'no',
       'votes_cast', 'turnout', 'yes_perc', 'yes_perc_bin'],
      dtype='object')

In [313]:
dfA

Unnamed: 0,precinct,geometry,Prec_2022,registered_voters,yes,no,votes_cast,turnout,yes_perc,yes_perc_bin
0,9401,"POLYGON ((-122.50593 37.76042, -122.50700 37.7...",9401,2566.0,626.0,411.0,1037.0,40.4,60.366442,60-65%
1,9403,"POLYGON ((-122.49555 37.75714, -122.49602 37.7...",9411,2247.0,595.0,366.0,961.0,42.8,61.914672,60-65%
2,9404,"POLYGON ((-122.48959 37.75740, -122.49066 37.7...",9412,3243.0,791.0,547.0,1338.0,41.3,59.118087,55-60%
3,9406,"POLYGON ((-122.47712 37.76356, -122.47697 37.7...",9406,2415.0,435.0,377.0,812.0,33.6,53.571429,50-55%
4,9408,"POLYGON ((-122.50112 37.75315, -122.50219 37.7...",9416,2979.0,707.0,572.0,1279.0,42.9,55.277561,55-60%
5,9413,"POLYGON ((-122.48492 37.75200, -122.48599 37.7...",9419,3296.0,903.0,478.0,1381.0,41.9,65.3874,65-70%
6,9414,"POLYGON ((-122.47660 37.75611, -122.47713 37.7...",9415,2227.0,481.0,387.0,868.0,39.0,55.414747,55-60%
7,9417,"POLYGON ((-122.49518 37.75165, -122.49516 37.7...",9424,2211.0,622.0,386.0,1008.0,45.6,61.706349,60-65%
8,9421,"POLYGON ((-122.47635 37.75238, -122.47742 37.7...",9422,2106.0,479.0,292.0,771.0,36.6,62.127108,60-65%
9,9423,"POLYGON ((-122.50407 37.74928, -122.50394 37.7...",9428,2363.0,577.0,533.0,1110.0,47.0,51.981982,50-55%


In [314]:
dfA = dfA[['precinct','registered_voters','yes','no','votes_cast','turnout','yes_perc','yes_perc_bin','geometry']]
gdfA = GeoDataFrame(dfA, crs="EPSG:4269", geometry='geometry')

In [315]:
# merge gdf dfd4_grouped

dfd4_grouped['precinct'] = dfd4_grouped['precinct'].astype(str)

gdf4 = gdf.merge(dfd4_grouped, on='precinct')

gdf4


Unnamed: 0,precinct,geometry,Prec_2022,registered_voters,joel_engardio,gordon_mar,total_votes,joel_engardio_p,gordon_mar_p,turnout,winner
0,9401,"POLYGON ((-122.50593 37.76042, -122.50700 37.7...",9401,2403.0,536.0,792.0,1328.0,40.4,59.6,55.3,gordon_mar
1,9403,"POLYGON ((-122.49555 37.75714, -122.49602 37.7...",9411,2123.0,541.0,635.0,1176.0,46.0,54.0,55.4,gordon_mar
2,9404,"POLYGON ((-122.48959 37.75740, -122.49066 37.7...",9412,3130.0,953.0,840.0,1793.0,53.2,46.8,57.3,joel_engardio
3,9406,"POLYGON ((-122.47712 37.76356, -122.47697 37.7...",9406,2312.0,592.0,664.0,1256.0,47.1,52.9,54.3,gordon_mar
4,9408,"POLYGON ((-122.50112 37.75315, -122.50219 37.7...",9416,2868.0,717.0,897.0,1614.0,44.4,55.6,56.3,gordon_mar
5,9413,"POLYGON ((-122.48492 37.75200, -122.48599 37.7...",9419,3143.0,895.0,818.0,1713.0,52.2,47.8,54.5,joel_engardio
6,9414,"POLYGON ((-122.47660 37.75611, -122.47713 37.7...",9415,2152.0,615.0,572.0,1187.0,51.8,48.2,55.2,joel_engardio
7,9417,"POLYGON ((-122.49518 37.75165, -122.49516 37.7...",9424,2173.0,656.0,592.0,1248.0,52.6,47.4,57.4,joel_engardio
8,9421,"POLYGON ((-122.47635 37.75238, -122.47742 37.7...",9422,1992.0,545.0,550.0,1095.0,49.8,50.2,55.0,gordon_mar
9,9423,"POLYGON ((-122.50407 37.74928, -122.50394 37.7...",9428,2264.0,677.0,686.0,1363.0,49.7,50.3,60.2,gordon_mar


In [316]:
gdf

Unnamed: 0,precinct,geometry,Prec_2022
0,1101,"POLYGON ((-122.47167 37.72163, -122.47078 37.7...",1101
1,1102,"POLYGON ((-122.46802 37.71610, -122.46800 37.7...",1102
2,1103,"POLYGON ((-122.46625 37.71968, -122.46715 37.7...",1103
3,1104,"POLYGON ((-122.46266 37.72000, -122.46267 37.7...",1104
4,1105,"POLYGON ((-122.46263 37.71742, -122.46262 37.7...",1105
...,...,...,...
482,9808,"POLYGON ((-122.44106 37.73497, -122.44031 37.7...",9808
483,9901,"POLYGON ((-122.43251 37.73311, -122.43233 37.7...",9901
484,9902,"POLYGON ((-122.40396 37.73161, -122.40340 37.7...",9902
485,9903,"POLYGON ((-122.33081 37.78579, -122.33061 37.7...",9903


In [317]:
# merge the gdf with dfK

dfK['precinct'] = dfK['precinct'].astype(str)

gdfK = gdf.merge(dfK, on='precinct')

In [318]:
# export gdfK to geojson

gdfK.to_file("./docs/propK/propK.geojson", driver='GeoJSON', encoding='utf-8')
gdfK.to_file("./docs/comparison/propK.geojson", driver='GeoJSON', encoding='utf-8')

In [319]:
gdfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,yes_perc,turnout,geometry
0,9401,2566,1039,40-45%,40.5,"POLYGON ((-122.50593 37.76042, -122.50700 37.7..."
1,9403,2247,965,40-45%,42.9,"POLYGON ((-122.49555 37.75714, -122.49602 37.7..."
2,9404,3243,1344,40-45%,41.4,"POLYGON ((-122.48959 37.75740, -122.49066 37.7..."
3,9406,2415,813,30-35%,33.7,"POLYGON ((-122.47712 37.76356, -122.47697 37.7..."
4,9408,2979,1285,40-45%,43.1,"POLYGON ((-122.50112 37.75315, -122.50219 37.7..."
5,9413,3296,1385,40-45%,42.0,"POLYGON ((-122.48492 37.75200, -122.48599 37.7..."
6,9414,2227,868,35-40%,39.0,"POLYGON ((-122.47660 37.75611, -122.47713 37.7..."
7,9417,2211,1008,45-50%,45.6,"POLYGON ((-122.49518 37.75165, -122.49516 37.7..."
8,9421,2106,771,35-40%,36.6,"POLYGON ((-122.47635 37.75238, -122.47742 37.7..."
9,9423,2363,1113,45-50%,47.1,"POLYGON ((-122.50407 37.74928, -122.50394 37.7..."


In [320]:
# export both 

gdfTurnout.to_file("./docs/turnout/turnout.geojson", driver='GeoJSON', encoding='utf-8')
gdfA.to_file("./docs/propA/propA.geojson", driver='GeoJSON', encoding='utf-8')
gdfA.to_file("./docs/comparison/propA.geojson", driver='GeoJSON', encoding='utf-8')

In [321]:
# export gdf4

gdf4.to_file("./docs/2022/d4.geojson", driver='GeoJSON', encoding='utf-8')
gdf4.to_file("./docs/comparison/d4.geojson", driver='GeoJSON', encoding='utf-8')