In [761]:
import numpy as np
import pandas as pd
from pandas import json_normalize
import geopandas as gpd
from geopandas import GeoDataFrame
import xlrd
from random import randint
import requests
from io import BytesIO


pd.set_option('display.max_columns', None)

In [762]:
gdf = gpd.read_file("./shp/SF_DOE_PREC_2022_07_18_pg.shp", encoding='utf-8')
gdf = gdf.to_crs('EPSG:4269')

In [763]:
url = 'https://www.sfelections.org/results/20250916/data/20250807/sov.xlsx'

In [764]:
response = requests.get(url)

In [765]:
dfTurnout = pd.read_excel(BytesIO(response.content), sheet_name='Sheet1', header=4, skipfooter=10)
dfA = pd.read_excel(BytesIO(response.content), sheet_name='Sheet2', header=3, skipfooter=8)

In [766]:
dfA

Unnamed: 0,Precinct,Registered \nVoters,Undervotes,Unnamed: 3,Overvotes,Precinct.1,Yes\n,Unnamed: 7,No\n,Unnamed: 9,Total Votes,Unnamed: 11
0,Electionwide,,,,,Electionwide,,,,,,
1,Electionwide,,,,,Electionwide,,,,,,
2,PCT 9401,,,,,PCT 9401,,,,,,
3,Election Day,2566.0,0.0,,0.0,Election Day,0.0,,0.0,,0.0,
4,Vote by Mail,2566.0,0.0,,0.0,Vote by Mail,0.0,,0.0,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
77,Total,594.0,0.0,,0.0,Total,0.0,,0.0,,0.0,
78,PCT 9451,,,,,PCT 9451,,,,,,
79,Election Day,2383.0,0.0,,0.0,Election Day,0.0,,0.0,,0.0,
80,Vote by Mail,2383.0,0.0,,0.0,Vote by Mail,0.0,,0.0,,0.0,


In [767]:
###
### TURNOUT
###

#trim and rename columns
dfTurnout = dfTurnout[['Precinct','Registered\nVoters','Voters Cast']]
dfTurnout.columns = ['precinct','registered_voters','votes_cast']

# shift precinct column

dfTurnout['precinct'] = dfTurnout['precinct'].shift(3)

#drop unnecessary ones
dfTurnout = dfTurnout.dropna()
dfTurnout = dfTurnout[dfTurnout["precinct"].str.contains('PCT')]

#tidy up precinct column data
dfTurnout['precinct'] = dfTurnout['precinct'].str.replace('PCT ','').str.replace(' MB','')

#change data types
dfTurnout.registered_voters = dfTurnout.registered_voters.astype(int)
dfTurnout.votes_cast = dfTurnout.votes_cast.astype(int)

#create turnout column
dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1)
dfTurnout['turnout'] = dfTurnout['turnout'].fillna(0)

In [768]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,0,0.0
9,9403,2247,0,0.0
13,9404,3243,0,0.0
17,9406,2415,0,0.0
21,9408,2979,0,0.0
25,9413,3296,0,0.0
29,9414,2227,0,0.0
33,9417,2211,0,0.0
37,9421,2106,0,0.0
41,9423,2363,0,0.0


In [769]:
def process_proposition(df):
    # Standardize column names
    df.columns = df.columns.str.replace('\n', '', regex=True)
    df.columns = df.columns.str.replace('"', '', regex=True)
    df.columns = df.columns.str.replace('.', '', regex=True)
    df.columns = df.columns.str.lower().str.replace(' ', '_', regex=True)
    df.columns = df.columns.str.replace('_$', '', regex=True)

    # if the column name contains "bonds_-_yes", replace it with "yes"

    df.columns = df.columns.str.replace('bonds_-_yes', 'yes', regex=True)
    df.columns = df.columns.str.replace('bonds_-_no', 'no', regex=True)
    
    # Trim and rename columns to focus on necessary data
    df = df[['precinct', 'registered_voters', 'yes', 'no', 'total_votes']]
    df.columns = ['precinct', 'registered_voters', 'yes', 'no', 'votes_cast']

    # shift the precinct column up by 3 rows

    df['precinct'] = df['precinct'].shift(3)

    # Drop rows where precinct is NaN after the shift
    df = df.dropna(subset=['precinct'])
    df = df[df['precinct'].str.contains('PCT', na=False)]

    # Clean up precinct column data
    df['precinct'] = df['precinct'].str.replace('PCT ', '', regex=True).str.replace(' MB', '', regex=True)

    # Ensure numeric columns are of the correct type
    # df['registered_voters'] = pd.to_numeric(df['registered_voters'], errors='coerce').fillna(0).astype(int)
    # df['votes_cast'] = pd.to_numeric(df['votes_cast'], errors='coerce').fillna(0).astype(int)

    # Calculate turnout
    df['turnout'] = round((df['votes_cast'] / df['registered_voters']) * 100, 1).fillna(0)

    return df

In [770]:
dfA = process_proposition(dfA)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['precinct'] = df['precinct'].shift(3)


In [771]:
bins = [0, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 100]
labels = ['Less than 25%', '25-30%', '30-35%', '35-40%', '40-45%', '45-50%', '50-55%', '55-60%', '60-65%', '65-70%', '70-75%', '75% and more']

In [772]:
# I want to generate random numbers for yes and no columns in dfA, but the sum of yes and no should be  no more than registered_voters

def generate_yes_no(row):
    registered_voters = row['registered_voters']
    if registered_voters == 0:
        return pd.Series([0, 0])
    
    yes = randint(0, registered_voters)
    no = randint(0, registered_voters - yes)
    
    return pd.Series([yes, no])

# make votes_cast the sum of yes and no

dfA[['yes', 'no']] = dfA.apply(generate_yes_no, axis=1)
dfA['votes_cast'] = dfA['yes'] + dfA['no']
dfA['turnout'] = round((dfA['votes_cast'] / dfA['registered_voters']) * 100, 1).fillna(0)


In [773]:
dfTurnout.head()

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,0,0.0
9,9403,2247,0,0.0
13,9404,3243,0,0.0
17,9406,2415,0,0.0
21,9408,2979,0,0.0


In [774]:
# do the same thing for dfTurnout. This time we are just populating the votes_cast column with random numbers

dfTurnout['votes_cast'] = dfTurnout.apply(
    lambda row: randint(0, row['registered_voters']) if row['registered_voters'] > 0 else 0,
    axis=1
)
dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1).fillna(0)

In [775]:
dfA

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,turnout
5,9401,2566.0,305,12,317,12.4
9,9403,2247.0,183,1825,2008,89.4
13,9404,3243.0,2982,15,2997,92.4
17,9406,2415.0,1436,542,1978,81.9
21,9408,2979.0,2515,386,2901,97.4
25,9413,3296.0,67,1932,1999,60.6
29,9414,2227.0,769,1323,2092,93.9
33,9417,2211.0,2077,35,2112,95.5
37,9421,2106.0,33,1356,1389,66.0
41,9423,2363.0,700,1125,1825,77.2


In [776]:

# Calculate voter turnout percentage
dfTurnout['yes_perc'] = (dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100

# Categorize turnout percentages into bins
dfTurnout['yes_perc'] = pd.cut(dfTurnout['yes_perc'], bins=bins, labels=labels, include_lowest=True)

In [777]:
dfA['precinct'] = dfA['precinct'].astype(int)
dfA['yes_perc'] = (dfA['yes'] / dfA['votes_cast']) * 100
dfA['yes_perc_bin'] = pd.cut(dfA['yes_perc'], bins=bins, labels=labels, include_lowest=True)
dfA['yes_perc'] = dfA['yes_perc'].fillna('no data')
dfA['yes_perc_bin'] = dfA['yes_perc_bin'].astype(str)

In [778]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout,yes_perc
5,9401,2566,2237,87.2,75% and more
9,9403,2247,294,13.1,Less than 25%
13,9404,3243,2676,82.5,75% and more
17,9406,2415,2055,85.1,75% and more
21,9408,2979,2682,90.0,75% and more
25,9413,3296,2873,87.2,75% and more
29,9414,2227,1820,81.7,75% and more
33,9417,2211,1263,57.1,55-60%
37,9421,2106,1715,81.4,75% and more
41,9423,2363,2201,93.1,75% and more


# merging

In [779]:
gdf = gdf[['Prec_2022','geometry']]
gdf.Prec_2022 = gdf.Prec_2022.astype(str)

In [780]:
gdf

Unnamed: 0,Prec_2022,geometry
0,7042,"POLYGON ((-122.42165 37.71029, -122.42100 37.7..."
1,1107,"POLYGON ((-122.45595 37.71134, -122.45593 37.7..."
2,1145,"POLYGON ((-122.44617 37.71104, -122.44550 37.7..."
3,7043,"POLYGON ((-122.41508 37.71166, -122.41289 37.7..."
4,7046,"POLYGON ((-122.40973 37.71195, -122.40883 37.7..."
...,...,...
509,9201,"POLYGON ((-122.47754 37.81011, -122.47750 37.8..."
510,9202,"POLYGON ((-122.47725 37.81102, -122.47711 37.8..."
511,7301,"POLYGON ((-122.41991 37.81163, -122.41990 37.8..."
512,7645,"POLYGON ((-122.36862 37.83116, -122.36733 37.8..."


In [781]:
groupings = {
    "9401": ["9401", "9402"], 
    "9403": ["9403", "9411"],
    "9404": ["9404", "9405", "9412"],
    "9406": ["9406", "9407"],
    "9408": ["9408", "9409", "9416"],
    "9413": ["9413", "9418", "9419"], 
    "9414": ["9414", "9415"],
    "9417": ["9417", "9424"],
    "9421": ["9421", "9422"], 
    "9423": ["9423", "9428"], 
    "9425": ["9425", "9426", "9431"],
    "9427": ["9427", "9433", "9434"],
    "9429": ["9429", "9436", "9438"], 
    "9432": ["9432", "9441"], 
    "9435": ["9435", "9443"],
    "9437": ["9437", "9444", "9445"],
    "9439": ["9439","9446"], 
    "9442": ["9442", "9447", "9448"],
    "9449": ["9449"],
    "9451": ["9451", "9452"]
}

In [782]:
# I want to merge the precincts in gdf based on the groupings above. The new precinct column should be the key of the groupings dict

gdf['precinct'] = gdf['Prec_2022'].astype(str)

gdf['precinct'] = gdf['precinct'].apply(lambda x: next((key for key, values in groupings.items() if x in values), x))

gdf = gdf.dissolve(by='precinct', as_index=False).reset_index(drop=True)

gdf

Unnamed: 0,precinct,geometry,Prec_2022
0,1101,"POLYGON ((-122.47167 37.72163, -122.47078 37.7...",1101
1,1102,"POLYGON ((-122.46802 37.71610, -122.46800 37.7...",1102
2,1103,"POLYGON ((-122.46625 37.71968, -122.46715 37.7...",1103
3,1104,"POLYGON ((-122.46266 37.72000, -122.46267 37.7...",1104
4,1105,"POLYGON ((-122.46263 37.71742, -122.46262 37.7...",1105
...,...,...,...
482,9808,"POLYGON ((-122.44106 37.73497, -122.44031 37.7...",9808
483,9901,"POLYGON ((-122.43251 37.73311, -122.43233 37.7...",9901
484,9902,"POLYGON ((-122.40396 37.73161, -122.40340 37.7...",9902
485,9903,"POLYGON ((-122.33081 37.78579, -122.33061 37.7...",9903


In [783]:
# export to geojson

gdf.to_file("sf_precincts_merged.geojson", driver='GeoJSON', encoding='utf-8')

In [784]:
# make dfTurnout yes_perc a string too

dfTurnout['yes_perc'] = dfTurnout['yes_perc'].astype(str)

In [785]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout,yes_perc
5,9401,2566,2237,87.2,75% and more
9,9403,2247,294,13.1,Less than 25%
13,9404,3243,2676,82.5,75% and more
17,9406,2415,2055,85.1,75% and more
21,9408,2979,2682,90.0,75% and more
25,9413,3296,2873,87.2,75% and more
29,9414,2227,1820,81.7,75% and more
33,9417,2211,1263,57.1,55-60%
37,9421,2106,1715,81.4,75% and more
41,9423,2363,2201,93.1,75% and more


In [786]:
gdf

Unnamed: 0,precinct,geometry,Prec_2022
0,1101,"POLYGON ((-122.47167 37.72163, -122.47078 37.7...",1101
1,1102,"POLYGON ((-122.46802 37.71610, -122.46800 37.7...",1102
2,1103,"POLYGON ((-122.46625 37.71968, -122.46715 37.7...",1103
3,1104,"POLYGON ((-122.46266 37.72000, -122.46267 37.7...",1104
4,1105,"POLYGON ((-122.46263 37.71742, -122.46262 37.7...",1105
...,...,...,...
482,9808,"POLYGON ((-122.44106 37.73497, -122.44031 37.7...",9808
483,9901,"POLYGON ((-122.43251 37.73311, -122.43233 37.7...",9901
484,9902,"POLYGON ((-122.40396 37.73161, -122.40340 37.7...",9902
485,9903,"POLYGON ((-122.33081 37.78579, -122.33061 37.7...",9903


In [787]:
dfTurnout = gdf.merge(dfTurnout, right_on='precinct', left_on='precinct')
dfTurnout = dfTurnout[['precinct','registered_voters','votes_cast','yes_perc','turnout','geometry']]
gdfTurnout = GeoDataFrame(dfTurnout, crs="EPSG:4269", geometry='geometry')

In [788]:
dfA.dtypes

precinct               int64
registered_voters    float64
yes                    int64
no                     int64
votes_cast             int64
turnout              float64
yes_perc             float64
yes_perc_bin          object
dtype: object

In [789]:
# make dfA precinct string

dfA['precinct'] = dfA['precinct'].astype(str)

In [790]:
# merge 

dfA = gdf.merge(dfA, on='precinct')

In [791]:
dfA.columns

Index(['precinct', 'geometry', 'Prec_2022', 'registered_voters', 'yes', 'no',
       'votes_cast', 'turnout', 'yes_perc', 'yes_perc_bin'],
      dtype='object')

In [792]:
dfA

Unnamed: 0,precinct,geometry,Prec_2022,registered_voters,yes,no,votes_cast,turnout,yes_perc,yes_perc_bin
0,9401,"POLYGON ((-122.50593 37.76042, -122.50700 37.7...",9401,2566.0,305,12,317,12.4,96.214511,75% and more
1,9403,"POLYGON ((-122.49555 37.75714, -122.49602 37.7...",9411,2247.0,183,1825,2008,89.4,9.113546,Less than 25%
2,9404,"POLYGON ((-122.48959 37.75740, -122.49066 37.7...",9412,3243.0,2982,15,2997,92.4,99.499499,75% and more
3,9406,"POLYGON ((-122.47712 37.76356, -122.47697 37.7...",9406,2415.0,1436,542,1978,81.9,72.598584,70-75%
4,9408,"POLYGON ((-122.50112 37.75315, -122.50219 37.7...",9416,2979.0,2515,386,2901,97.4,86.694243,75% and more
5,9413,"POLYGON ((-122.48492 37.75200, -122.48599 37.7...",9419,3296.0,67,1932,1999,60.6,3.351676,Less than 25%
6,9414,"POLYGON ((-122.47660 37.75611, -122.47713 37.7...",9415,2227.0,769,1323,2092,93.9,36.759082,35-40%
7,9417,"POLYGON ((-122.49518 37.75165, -122.49516 37.7...",9424,2211.0,2077,35,2112,95.5,98.342803,75% and more
8,9421,"POLYGON ((-122.47635 37.75238, -122.47742 37.7...",9422,2106.0,33,1356,1389,66.0,2.37581,Less than 25%
9,9423,"POLYGON ((-122.50407 37.74928, -122.50394 37.7...",9428,2363.0,700,1125,1825,77.2,38.356164,35-40%


In [793]:
dfA = dfA[['precinct','registered_voters','yes','no','votes_cast','turnout','yes_perc','yes_perc_bin','geometry']]
gdfA = GeoDataFrame(dfA, crs="EPSG:4269", geometry='geometry')

In [794]:
# export both 

gdfTurnout.to_file("./docs/turnout/turnout.geojson", driver='GeoJSON', encoding='utf-8')
gdfA.to_file("./docs/propA/propA.geojson", driver='GeoJSON', encoding='utf-8')

In [795]:
gdfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,yes_perc,turnout,geometry
0,9401,2566,2237,75% and more,87.2,"POLYGON ((-122.50593 37.76042, -122.50700 37.7..."
1,9403,2247,294,Less than 25%,13.1,"POLYGON ((-122.49555 37.75714, -122.49602 37.7..."
2,9404,3243,2676,75% and more,82.5,"POLYGON ((-122.48959 37.75740, -122.49066 37.7..."
3,9406,2415,2055,75% and more,85.1,"POLYGON ((-122.47712 37.76356, -122.47697 37.7..."
4,9408,2979,2682,75% and more,90.0,"POLYGON ((-122.50112 37.75315, -122.50219 37.7..."
5,9413,3296,2873,75% and more,87.2,"POLYGON ((-122.48492 37.75200, -122.48599 37.7..."
6,9414,2227,1820,75% and more,81.7,"POLYGON ((-122.47660 37.75611, -122.47713 37.7..."
7,9417,2211,1263,55-60%,57.1,"POLYGON ((-122.49518 37.75165, -122.49516 37.7..."
8,9421,2106,1715,75% and more,81.4,"POLYGON ((-122.47635 37.75238, -122.47742 37.7..."
9,9423,2363,2201,75% and more,93.1,"POLYGON ((-122.50407 37.74928, -122.50394 37.7..."
