# Dr. Frank's 2020 Election Analysis for Ohio

Based on Robert Lee's Python code from [his github repo](https://github.com/rlee32/election-fraud-ohio).

efg, 2022-03-24

[Ohio Online Databases](https://www.ohiosos.gov/secretary-office/online-databases/)

[Ohio County Voter Files](https://www6.ohiosos.gov/ords/f?p=VOTERFTP:HOME:::#cntyVtrFiles)

Example URL for Adams County in .csv format:

`https://www6.ohiosos.gov/ords/f?p=VOTERFTP:DOWNLOAD::FILE:NO:2:P2_PRODUCT_NUMBER:1`

In [1]:
import datetime
start_time = datetime.datetime.now()
print (start_time)

2022-03-25 01:06:49.416738


In [2]:
import os
import sys
import json
import csv
import requests

# 1. Download Ohio Voter Registration Data

Based on Robert Lee's script `download_voter_database.py`

## 1.1 Sample download

In [3]:
remote_url = "https://www6.ohiosos.gov/ords/f?p=VOTERFTP:DOWNLOAD::FILE:NO:2:P2_PRODUCT_NUMBER:1"
local_file = "Adams-County.txt"

In [4]:
!!curl $remote_url -o $local_file

['  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current',
 '                                 Dload  Upload   Total   Spent    Left  Speed',
 '',
 '  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0',
 '100    16  100    16    0     0    116      0 --:--:-- --:--:-- --:--:--   116']

But this doesn't work inside a python loop.

The `headers` trick below is not always needed, but is with the Ohio SOS site to avoid errors, like 403.

In [5]:
response = requests.get(remote_url, headers={'User-agent': 'Mozilla/5.0'})

with open(local_file, 'wb') as f:
    f.write(response.content)

## 1.2 Fetch data for all counties

In [6]:
total_counties = 88  

VOTER_DATABASE_FOLDER = './voter_database' 
os.mkdir(VOTER_DATABASE_FOLDER)

print(datetime.datetime.now())

2022-03-25 01:06:52.762893


In [7]:
print(f'downloading voter registration database for all {total_counties} counties\n')
for i in range(total_counties):
    county_id = i + 1
    
    remote_url = f'https://www6.ohiosos.gov/ords/f?p=VOTERFTP:DOWNLOAD::FILE:NO:2:P2_PRODUCT_NUMBER:{county_id}'
    local_file = f'{VOTER_DATABASE_FOLDER}/{county_id}.csv'
    
    response = requests.get(remote_url, headers={'User-agent': 'Mozilla/5.0'})
    with open(local_file, 'wb') as f:
        f.write(response.content)
    
    print(f'downloaded {i + 1} / {total_counties} files.')     

downloading voter registration database for all 88 counties

downloaded 1 / 88 files.
downloaded 2 / 88 files.
downloaded 3 / 88 files.
downloaded 4 / 88 files.
downloaded 5 / 88 files.
downloaded 6 / 88 files.
downloaded 7 / 88 files.
downloaded 8 / 88 files.
downloaded 9 / 88 files.
downloaded 10 / 88 files.
downloaded 11 / 88 files.
downloaded 12 / 88 files.
downloaded 13 / 88 files.
downloaded 14 / 88 files.
downloaded 15 / 88 files.
downloaded 16 / 88 files.
downloaded 17 / 88 files.
downloaded 18 / 88 files.
downloaded 19 / 88 files.
downloaded 20 / 88 files.
downloaded 21 / 88 files.
downloaded 22 / 88 files.
downloaded 23 / 88 files.
downloaded 24 / 88 files.
downloaded 25 / 88 files.
downloaded 26 / 88 files.
downloaded 27 / 88 files.
downloaded 28 / 88 files.
downloaded 29 / 88 files.
downloaded 30 / 88 files.
downloaded 31 / 88 files.
downloaded 32 / 88 files.
downloaded 33 / 88 files.
downloaded 34 / 88 files.
downloaded 35 / 88 files.
downloaded 36 / 88 files.
downloaded 3

In [8]:
print(datetime.datetime.now())

2022-03-25 01:14:24.638290


# 2. `jsonify` data files

Based on Robert Lee's script:  `jsonify.py`

## 2.1 "Helper" functions

In [9]:
def print_header(filepath):
    with open(filepath, 'r') as f:
        csv_reader = csv.reader(f)
        for row in csv_reader:
            header = row
            break
        i = 0
        for col in header:
            print(col, i)
            i += 1                   

In [10]:
def convert_csv(filepath):
    with open(filepath, 'r') as f:
        csv_reader = csv.reader(f)
        new_items = []
        for row in csv_reader:
            header = row
            break

        # determine column indices.
        date_of_birth_index = header.index('DATE_OF_BIRTH')
        registration_date_index = header.index('REGISTRATION_DATE')
        voter_status_index = header.index('VOTER_STATUS')
        general_2016_index = header.index('GENERAL-11/08/2016')
        general_2020_index = header.index('GENERAL-11/03/2020')
        general_2000_index = header.index('GENERAL-11/07/2000')
        general_2004_index = header.index('GENERAL-11/02/2004')
        general_2008_index = header.index('GENERAL-11/04/2008')
        general_2012_index = header.index('GENERAL-11/06/2012')

        statuses = {}
        for row in csv_reader:
            voter_status = row[voter_status_index]
            new_items.append({
                'date_of_birth': row[date_of_birth_index],
                'registration_date': row[registration_date_index],
                'voter_status': voter_status,
                'general_2000': row[general_2000_index],
                'general_2016': row[general_2016_index],
                'general_2020': row[general_2020_index],
                'general_2004': row[general_2004_index],
                'general_2008': row[general_2008_index],
                'general_2012': row[general_2012_index],
            })
            if voter_status not in statuses:
                statuses[voter_status] = 0
            statuses[voter_status] += 1

        print(f'voter statuses: {statuses}')
        return new_items                    

## 2.2 Process files

In [11]:
OUTPUT_FOLDER = './jsonified/' 
os.mkdir(OUTPUT_FOLDER)

In [12]:
already_output = set(os.listdir(OUTPUT_FOLDER))

In [13]:
filenames = os.listdir(VOTER_DATABASE_FOLDER)

In [14]:
print(datetime.datetime.now())

2022-03-25 01:14:24.799046


In [15]:
failures = 0
for f in filenames:
    if f in already_output:
        print(f'skipping {f}; already in {OUTPUT_FOLDER}')
        continue
    if f[-4:] != '.csv':
        continue
    try:
        county_id = int(f.split('.')[0])
        print(f'\nconverting county_id {county_id}')

        filepath = f'{VOTER_DATABASE_FOLDER}/{county_id}.csv'
        new_items = convert_csv(filepath)
        print(f'got {len(new_items)} voters for county_id {county_id}.')
        json.dump(new_items, open(f'{OUTPUT_FOLDER}/{county_id}.json', 'w'), indent=2)
    except:
        print(f'could not jsonify county_id {county_id}\n')
        failures += 1
print(f'failed to jsonify {failures} counties.')         


converting county_id 1
voter statuses: {'ACTIVE': 14167, 'CONFIRMATION': 2878}
got 17045 voters for county_id 1.

converting county_id 10
voter statuses: {'ACTIVE': 15797, 'CONFIRMATION': 2433}
got 18230 voters for county_id 10.

converting county_id 11
voter statuses: {'ACTIVE': 22661, 'CONFIRMATION': 3545}
got 26206 voters for county_id 11.

converting county_id 12
voter statuses: {'ACTIVE': 72967, 'CONFIRMATION': 15083}
got 88050 voters for county_id 12.

converting county_id 13
voter statuses: {'ACTIVE': 123966, 'CONFIRMATION': 19823}
got 143789 voters for county_id 13.

converting county_id 14
voter statuses: {'ACTIVE': 23171, 'CONFIRMATION': 4022}
got 27193 voters for county_id 14.

converting county_id 15
voter statuses: {'ACTIVE': 55841, 'CONFIRMATION': 9272}
got 65113 voters for county_id 15.

converting county_id 16
voter statuses: {'ACTIVE': 19245, 'CONFIRMATION': 3033}
got 22278 voters for county_id 16.

converting county_id 17
voter statuses: {'ACTIVE': 23826, 'CONFIRMATI


converting county_id 74
voter statuses: {'ACTIVE': 29211, 'CONFIRMATION': 4895}
got 34106 voters for county_id 74.

converting county_id 75
voter statuses: {'ACTIVE': 28303, 'CONFIRMATION': 4211}
got 32514 voters for county_id 75.

converting county_id 76
voter statuses: {'ACTIVE': 214782, 'CONFIRMATION': 32524}
got 247306 voters for county_id 76.

converting county_id 77
voter statuses: {'ACTIVE': 313829, 'CONFIRMATION': 52457}
got 366286 voters for county_id 77.

converting county_id 78
voter statuses: {'CONFIRMATION': 19339, 'ACTIVE': 116053}
got 135392 voters for county_id 78.

converting county_id 79
voter statuses: {'ACTIVE': 50239, 'CONFIRMATION': 8360}
got 58599 voters for county_id 79.

converting county_id 8
voter statuses: {'ACTIVE': 24213, 'CONFIRMATION': 4415}
got 28628 voters for county_id 8.

converting county_id 80
voter statuses: {'ACTIVE': 37723, 'CONFIRMATION': 4575}
got 42298 voters for county_id 80.

converting county_id 81
voter statuses: {'ACTIVE': 17514, 'CONFI

In [16]:
stop_time = datetime.datetime.now()
print (stop_time)
print ((stop_time - start_time), "elapsed time")

2022-03-25 01:20:33.377221
0:13:43.960483 elapsed time
