# Process Voting Locations for Third Party Consumption

- Vote By Mail Boxes
- Voting Centers

Vote By Mail Boxes are available from October 3, 2024 to 8:00pm November 5, 2024.


In [123]:
import pandas as pd
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Vote By Mail Boxes
VBM_PATH = os.path.normpath('la-county-rrcc-vbm-geodata.json')

# Voting Centers
VC_PATH = os.path.normpath('la-county-rrcc-vc-geodata.json')

## Setup Dataframes

### Vote By Mail (VBM) Boxes

In [124]:
vbm_df = pd.read_json(VBM_PATH)
vbm_mod_df = vbm_df.copy()

# vbm_mod_df.head()

### Voting Centers

In [125]:
vc_df = pd.read_json(VC_PATH)
vc_mod_df = vc_df.copy()

# vc_mod_df.head()

## Remove Columns

Check whether there is more than one unique value for the columns before removing them.

Remove the columns not needed by consumers, leaving only:

- `locationID`
- `name`
- `hours`
- `latitude`
- `longitude`
- `votingPeriod`

In [126]:
columns_to_drop = [
    'parkingArea',
    'isActive',
    'isCancelled',
    'waitTime',
    'waitTimeTS',
    'earlyVotingOpenTimeInPT',
    'earlyVotingCloseTimeInPT',
    'electionDayOpenTimeInPT',
    'electionDayCloseTimeInPT',
    'isInEarlyVotingRange',
    'isInElectionDayRange',
    'isWaitTimeUpdated',
    'isOpenButNotReported',
    'showLocationClosed',
    'electionDate',
    'earlyVotingStartDate',
    'earlyVotingEndtDate',
    'secondWaveEarlyVotingStartDate'
]

extra_columns = [
    'locationTypeCode',
    'electionID',
    'address',
    'city',
    'state',
    'zipCode',
    'parkingArea'
]

### Vote By Mail (VBM) Boxes

In [127]:
# Remove columns with only one unique value
for column in columns_to_drop:
    if (vbm_df[column].unique().size == 1):
        vbm_mod_df = vbm_mod_df.drop(columns=[column])

# Loop through list of extra columns and remove each one if it exists:
for column in extra_columns:
    if column in vbm_mod_df.columns:
        vbm_mod_df = vbm_mod_df.drop(columns=[column])

vbm_mod_df.head()

Unnamed: 0,locationID,name,hours,additionalInformation,votingPeriod,latitude,longitude
0,16400,A C Bilbrew Library,24 - Hour,VBM Drop Box,,33.916208,-118.272005
1,16313,Acton Park,24 - Hour,VBM Drop Box,,34.472052,-118.198579
2,16358,Adventure Park,6:30 AM - 9:00 PM,VBM Drop Box,,33.942315,-118.037057
3,16209,Agoura Hills Civic Center - City Hall,24 - Hour,VBM Drop Box,,34.144291,-118.776063
4,16054,Ahmanson Senior Citizen Center - Expo Center,Sunrise - Sunset,VBM Drop Box,,34.011686,-118.289954


### Voting Centers

In [128]:
# Remove columns with only one unique value
for column in columns_to_drop:
    if (vc_df[column].unique().size == 1):
        vc_mod_df = vc_mod_df.drop(columns=[column])

vc_mod_df = vc_mod_df.drop(columns=extra_columns)

vc_mod_df.head()

Unnamed: 0,locationID,name,hours,additionalInformation,votingPeriod,latitude,longitude
0,2035,107th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Teachers Development Center,4,33.939413,-118.272195
1,14483,10th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,34.048471,-118.27315
2,12765,4th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,34.028465,-118.15333
3,10762,54th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,33.992574,-118.343547
4,12346,93rd Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,33.951443,-118.267611


In [129]:
# Preview data for votingPeriod == 4

vc_mod_df[vc_mod_df['votingPeriod'] == 4].head()

Unnamed: 0,locationID,name,hours,additionalInformation,votingPeriod,latitude,longitude
0,2035,107th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Teachers Development Center,4,33.939413,-118.272195
1,14483,10th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,34.048471,-118.27315
2,12765,4th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,34.028465,-118.15333
3,10762,54th Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,33.992574,-118.343547
4,12346,93rd Street Elementary School,11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Auditorium,4,33.951443,-118.267611


In [130]:
# Preview data for votingPeriod == 11

vc_mod_df[vc_mod_df['votingPeriod'] == 11].head()

Unnamed: 0,locationID,name,hours,additionalInformation,votingPeriod,latitude,longitude
5,123,A C Bilbrew Library,10/26 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Meeting Room,11,33.915937,-118.272369
11,6769,Adventure Park,10/26 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Gymnasium,11,33.942448,-118.036963
14,13375,Ahmanson Senior Center,10/26 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Ballroom,11,34.011524,-118.289939
25,13752,Anderson Munger Family YMCA,10/26 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Multi-Purpose Room,11,34.069208,-118.308424
33,12803,Armenian Cilicia Evangelical Church,10/26 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,Derian Hall,11,34.140211,-118.099067


Modify voting center data to account for:

* Norwalk Headquarters hours
* Specific location information

In [131]:
# Change hours value to say "M-F" for locationID == 13565
vc_mod_df.loc[vc_mod_df['locationID'] == 13565, 'hours'] = 'Monday-Friday (excluding holidays) 8AM - 5PM'
vc_mod_df.loc[vc_mod_df['locationID'] == 13565, 'votingPeriod'] = 29

print(vc_mod_df[vc_mod_df['locationID'] == 13565])

     locationID                          name  \
300       13565  LA County Registrar-Recorder   

                                            hours additionalInformation  \
300  Monday-Friday (excluding holidays) 8AM - 5PM             Room 3201   

     votingPeriod   latitude   longitude  
300            29  33.915849 -118.067322  


In [132]:
# Combine name and additionalInformation columns into name column
vc_mod_df['name'] = vc_mod_df['name'] + ', ' + vc_mod_df['additionalInformation']
vc_mod_df = vc_mod_df.drop(columns=['additionalInformation'])
vc_mod_df.head()

Unnamed: 0,locationID,name,hours,votingPeriod,latitude,longitude
0,2035,"107th Street Elementary School, Teachers Development Center",11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,4,33.939413,-118.272195
1,14483,"10th Street Elementary School, Auditorium",11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,4,34.048471,-118.27315
2,12765,"4th Street Elementary School, Auditorium",11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,4,34.028465,-118.15333
3,10762,"54th Street Elementary School, Auditorium",11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,4,33.992574,-118.343547
4,12346,"93rd Street Elementary School, Auditorium",11/02 - 11/04: 10AM - 7PM<br/>11/05: 7AM - 8PM,4,33.951443,-118.267611


## Clean Data to Remove HTML tags

In [133]:
# Remove html tags from hours column
vbm_mod_df['hours'] = vbm_mod_df['hours'].str.replace(r'<[^>]*>', ', ')
vc_mod_df['hours'] = vc_mod_df['hours'].str.replace(r'<[^>]*>', ', ')

  vbm_mod_df['hours'] = vbm_mod_df['hours'].str.replace(r'<[^>]*>', ', ')
  vc_mod_df['hours'] = vc_mod_df['hours'].str.replace(r'<[^>]*>', ', ')


## Export Results

In [134]:
# export to json
vbm_mod_df.to_json('la-county-rrcc-vbm.json', orient='records')
vc_mod_df.to_json('la-county-rrcc-vc.json', orient='records')

## Extra Analysis

Some values for the VBM box `hours` column are a bit vague.

In [135]:
vbm_unique_hours_df = pd.DataFrame(vbm_mod_df['hours'].unique(), columns=['hours'])

vbm_unique_hours_df

Unnamed: 0,hours
0,24 - Hour
1,6:30 AM - 9:00 PM
2,Sunrise - Sunset
3,Hours of Operation
4,6 AM - 10 PM
5,24 - hour
6,7 AM - 9 PM
7,8 AM - 9:30 PM
8,7 AM - 8 PM
9,6:30 AM - 7:00 PM


Show unique values for the hours column, grouped by the votingPeriod column

In [136]:
# Show unique values for the hours column, grouped by the votingPeriod column
voting_periods_df = vc_mod_df[['votingPeriod', 'hours']].groupby('votingPeriod').agg(lambda x: x.unique())
voting_periods_df

Unnamed: 0_level_0,hours
votingPeriod,Unnamed: 1_level_1
4,"[11/02 - 11/04: 10AM - 7PM, 11/05: 7AM - 8PM]"
11,"[10/26 - 11/04: 10AM - 7PM, 11/05: 7AM - 8PM]"
29,[Monday-Friday (excluding holidays) 8AM - 5PM]


In [137]:
# Show the votingPeriod and hours columns for each unique value for the hours column
vc_unique_hours_df = pd.DataFrame(vc_mod_df['hours'].unique(), columns=['hours'])
vc_unique_hours_df

# vc_mod_df[vc_mod_df['hours'].str.contains('early voting')]

Unnamed: 0,hours
0,"11/02 - 11/04: 10AM - 7PM, 11/05: 7AM - 8PM"
1,"10/26 - 11/04: 10AM - 7PM, 11/05: 7AM - 8PM"
2,Monday-Friday (excluding holidays) 8AM - 5PM
