In [479]:
import numpy as np
import pandas as pd
from pandas import json_normalize
import geopandas as gpd
from geopandas import GeoDataFrame
import xlrd
from random import randint
import requests
from io import BytesIO


pd.set_option('display.max_columns', None)

In [480]:
gdf = gpd.read_file("./shp/SF_DOE_PREC_2022_07_18_pg.shp", encoding='utf-8')
gdf = gdf.to_crs('EPSG:4269')

In [481]:
url = 'https://www.sfelections.org/results/20250916/data/20250807/sov.xlsx'

In [482]:
response = requests.get(url)

In [483]:
dfTurnout = pd.read_excel(BytesIO(response.content), sheet_name='Sheet1', header=4, skipfooter=10)
dfA = pd.read_excel(BytesIO(response.content), sheet_name='Sheet2', header=3, skipfooter=8)

In [484]:
dfA

Unnamed: 0,Precinct,Registered \nVoters,Undervotes,Unnamed: 3,Overvotes,Precinct.1,Yes\n,Unnamed: 7,No\n,Unnamed: 9,Total Votes,Unnamed: 11
0,Electionwide,,,,,Electionwide,,,,,,
1,Electionwide,,,,,Electionwide,,,,,,
2,PCT 9401,,,,,PCT 9401,,,,,,
3,Election Day,2566.0,0.0,,0.0,Election Day,0.0,,0.0,,0.0,
4,Vote by Mail,2566.0,0.0,,0.0,Vote by Mail,0.0,,0.0,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
77,Total,594.0,0.0,,0.0,Total,0.0,,0.0,,0.0,
78,PCT 9451,,,,,PCT 9451,,,,,,
79,Election Day,2383.0,0.0,,0.0,Election Day,0.0,,0.0,,0.0,
80,Vote by Mail,2383.0,0.0,,0.0,Vote by Mail,0.0,,0.0,,0.0,


In [485]:
###
### TURNOUT
###

#trim and rename columns
dfTurnout = dfTurnout[['Precinct','Registered\nVoters','Voters Cast']]
dfTurnout.columns = ['precinct','registered_voters','votes_cast']

# shift precinct column

dfTurnout['precinct'] = dfTurnout['precinct'].shift(3)

#drop unnecessary ones
dfTurnout = dfTurnout.dropna()
dfTurnout = dfTurnout[dfTurnout["precinct"].str.contains('PCT')]

#tidy up precinct column data
dfTurnout['precinct'] = dfTurnout['precinct'].str.replace('PCT ','').str.replace(' MB','')

#change data types
dfTurnout.registered_voters = dfTurnout.registered_voters.astype(int)
dfTurnout.votes_cast = dfTurnout.votes_cast.astype(int)

#create turnout column
dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1)
dfTurnout['turnout'] = dfTurnout['turnout'].fillna(0)

In [486]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,0,0.0
9,9403,2247,0,0.0
13,9404,3243,0,0.0
17,9406,2415,0,0.0
21,9408,2979,0,0.0
25,9413,3296,0,0.0
29,9414,2227,0,0.0
33,9417,2211,0,0.0
37,9421,2106,0,0.0
41,9423,2363,0,0.0


In [487]:
def process_proposition(df):
    # Standardize column names
    df.columns = df.columns.str.replace('\n', '', regex=True)
    df.columns = df.columns.str.replace('"', '', regex=True)
    df.columns = df.columns.str.replace('.', '', regex=True)
    df.columns = df.columns.str.lower().str.replace(' ', '_', regex=True)
    df.columns = df.columns.str.replace('_$', '', regex=True)

    # if the column name contains "bonds_-_yes", replace it with "yes"

    df.columns = df.columns.str.replace('bonds_-_yes', 'yes', regex=True)
    df.columns = df.columns.str.replace('bonds_-_no', 'no', regex=True)
    
    # Trim and rename columns to focus on necessary data
    df = df[['precinct', 'registered_voters', 'yes', 'no', 'total_votes']]
    df.columns = ['precinct', 'registered_voters', 'yes', 'no', 'votes_cast']

    # shift the precinct column up by 3 rows

    df['precinct'] = df['precinct'].shift(3)

    # Drop rows where precinct is NaN after the shift
    df = df.dropna(subset=['precinct'])
    df = df[df['precinct'].str.contains('PCT', na=False)]

    # Clean up precinct column data
    df['precinct'] = df['precinct'].str.replace('PCT ', '', regex=True).str.replace(' MB', '', regex=True)

    # Ensure numeric columns are of the correct type
    # df['registered_voters'] = pd.to_numeric(df['registered_voters'], errors='coerce').fillna(0).astype(int)
    # df['votes_cast'] = pd.to_numeric(df['votes_cast'], errors='coerce').fillna(0).astype(int)

    # Calculate turnout
    df['turnout'] = round((df['votes_cast'] / df['registered_voters']) * 100, 1).fillna(0)

    return df

In [488]:
dfA = process_proposition(dfA)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['precinct'] = df['precinct'].shift(3)


In [489]:
bins = [0, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 100]
labels = ['Less than 25%', '25-30%', '30-35%', '35-40%', '40-45%', '45-50%', '50-55%', '55-60%', '60-65%', '65-70%', '70-75%', '75% and more']

In [490]:
# I want to generate random numbers for yes and no columns in dfA, but the sum of yes and no should be  no more than registered_voters

def generate_yes_no(row):
    registered_voters = row['registered_voters']
    if registered_voters == 0:
        return pd.Series([0, 0])
    
    yes = randint(0, registered_voters)
    no = randint(0, registered_voters - yes)
    
    return pd.Series([yes, no])

# make votes_cast the sum of yes and no

dfA[['yes', 'no']] = dfA.apply(generate_yes_no, axis=1)
dfA['votes_cast'] = dfA['yes'] + dfA['no']
dfA['turnout'] = round((dfA['votes_cast'] / dfA['registered_voters']) * 100, 1).fillna(0)


In [491]:
dfTurnout.head()

Unnamed: 0,precinct,registered_voters,votes_cast,turnout
5,9401,2566,0,0.0
9,9403,2247,0,0.0
13,9404,3243,0,0.0
17,9406,2415,0,0.0
21,9408,2979,0,0.0


In [492]:
# do the same thing for dfTurnout. This time we are just populating the votes_cast column with random numbers

dfTurnout['votes_cast'] = dfTurnout.apply(
    lambda row: randint(0, row['registered_voters']) if row['registered_voters'] > 0 else 0,
    axis=1
)
dfTurnout['turnout'] = round((dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100, 1).fillna(0)

In [493]:
dfA

Unnamed: 0,precinct,registered_voters,yes,no,votes_cast,turnout
5,9401,2566.0,2197,277,2474,96.4
9,9403,2247.0,1807,242,2049,91.2
13,9404,3243.0,2214,312,2526,77.9
17,9406,2415.0,608,417,1025,42.4
21,9408,2979.0,2762,34,2796,93.9
25,9413,3296.0,245,2664,2909,88.3
29,9414,2227.0,492,310,802,36.0
33,9417,2211.0,1957,211,2168,98.1
37,9421,2106.0,903,328,1231,58.5
41,9423,2363.0,1117,889,2006,84.9


In [494]:

# Calculate voter turnout percentage
dfTurnout['yes_perc'] = (dfTurnout['votes_cast'] / dfTurnout['registered_voters']) * 100

# Categorize turnout percentages into bins
dfTurnout['yes_perc'] = pd.cut(dfTurnout['yes_perc'], bins=bins, labels=labels, include_lowest=True)

In [495]:
dfA['precinct'] = dfA['precinct'].astype(int)
dfA['yes_perc'] = (dfA['yes'] / dfA['votes_cast']) * 100
dfA['yes_perc_bin'] = pd.cut(dfA['yes_perc'], bins=bins, labels=labels, include_lowest=True)
dfA['yes_perc'] = dfA['yes_perc'].fillna('no data')
dfA['yes_perc_bin'] = dfA['yes_perc_bin'].astype(str)

In [496]:
dfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,turnout,yes_perc
5,9401,2566,813,31.7,30-35%
9,9403,2247,916,40.8,40-45%
13,9404,3243,1881,58.0,55-60%
17,9406,2415,1123,46.5,45-50%
21,9408,2979,516,17.3,Less than 25%
25,9413,3296,1325,40.2,40-45%
29,9414,2227,2140,96.1,75% and more
33,9417,2211,820,37.1,35-40%
37,9421,2106,747,35.5,35-40%
41,9423,2363,2222,94.0,75% and more


# merging

In [497]:
gdf = gdf[['Prec_2022','geometry']]
gdf.Prec_2022 = gdf.Prec_2022.astype(str)

In [498]:
# make dfTurnout yes_perc a string too

dfTurnout['yes_perc'] = dfTurnout['yes_perc'].astype(str)

In [499]:
dfTurnout = gdf.merge(dfTurnout, right_on='precinct', left_on='Prec_2022')
dfTurnout = dfTurnout[['precinct','registered_voters','votes_cast','yes_perc','turnout','geometry']]
gdfTurnout = GeoDataFrame(dfTurnout, crs="EPSG:4269", geometry='geometry')

In [500]:
dfA.dtypes

precinct               int64
registered_voters    float64
yes                    int64
no                     int64
votes_cast             int64
turnout              float64
yes_perc             float64
yes_perc_bin          object
dtype: object

In [501]:
# make dfA precinct string

dfA['precinct'] = dfA['precinct'].astype(str)

In [502]:
# merge 

dfA = gdf.merge(dfA, right_on='precinct', left_on='Prec_2022')

In [503]:
dfA.columns

Index(['Prec_2022', 'geometry', 'precinct', 'registered_voters', 'yes', 'no',
       'votes_cast', 'turnout', 'yes_perc', 'yes_perc_bin'],
      dtype='object')

In [504]:
# make it a geodataframe

dfA = dfA[['precinct','registered_voters','yes','no','votes_cast','turnout','yes_perc','yes_perc_bin','geometry']]
gdfA = GeoDataFrame(dfA, crs="EPSG:4269", geometry='geometry')

In [505]:
# export both 

gdfTurnout.to_file("./docs/turnout/turnout.geojson", driver='GeoJSON', encoding='utf-8')

gdfA.to_file("./docs/propA/propA.geojson", driver='GeoJSON', encoding='utf-8')

In [506]:
gdfTurnout

Unnamed: 0,precinct,registered_voters,votes_cast,yes_perc,turnout,geometry
0,9449,594,592,75% and more,99.7,"POLYGON ((-122.49393 37.73381, -122.49388 37.7..."
1,9451,2383,1907,75% and more,80.0,"POLYGON ((-122.48465 37.73423, -122.48452 37.7..."
2,9437,3233,2281,70-75%,70.6,"POLYGON ((-122.49440 37.74035, -122.49485 37.7..."
3,9439,2006,708,35-40%,35.3,"POLYGON ((-122.49069 37.74239, -122.49056 37.7..."
4,9442,3195,681,Less than 25%,21.3,"POLYGON ((-122.47890 37.74291, -122.47784 37.7..."
5,9435,2214,372,Less than 25%,16.8,"POLYGON ((-122.50368 37.74369, -122.50355 37.7..."
6,9432,2164,1589,70-75%,73.4,"POLYGON ((-122.48666 37.74631, -122.48654 37.7..."
7,9429,3029,2478,75% and more,81.8,"POLYGON ((-122.49503 37.74957, -122.49492 37.7..."
8,9427,3041,267,Less than 25%,8.8,"POLYGON ((-122.47634 37.75226, -122.47623 37.7..."
9,9425,2761,642,Less than 25%,23.3,"POLYGON ((-122.49135 37.75171, -122.49121 37.7..."
