In [1]:
import io
import zipfile

import pandas as pd
import requests

from utils import settings

In [2]:
# This will need to be updated once Clarity starts posting results for the race...
# Typically this is right after the polls close in Georgia.
SUMMARY_CSV_URL = 'https://results.enr.clarityelections.com//GA//105369/271927/reports/summary.zip'

In [3]:
r = requests.get(SUMMARY_CSV_URL)

In [4]:
r.status_code

200

In [5]:
content = zipfile.ZipFile(io.BytesIO(r.content))

In [6]:
m = content.infolist()[0].date_time

In [7]:
last_updated = f'{m[0]}-{m[1]}-{m[2]} {m[3]}:{m[4]}:{m[5]}.000000'

In [8]:
summary_results = pd.read_csv(io.BytesIO(content.read('summary.csv')), encoding='latin1')

In [9]:
column_rename = {
    "contest name": "clarity_office",
    "choice name": "clarity_candidate",
    "total votes": "votes",
    "percent of votes": "percent_votes",
}
summary_results.rename(columns=column_rename, inplace=True)

In [10]:
summary_results.drop(
    columns=['line number', 'party name', 'num Precinct total', 'num Precinct rptg', 'over votes', 'under votes'], 
    inplace=True
)

In [11]:
summary_results.head()

Unnamed: 0,clarity_office,clarity_candidate,votes,percent_votes
0,President of the United States,Donald J. Trump (I) (Rep),2461837,49.25
1,President of the United States,Joseph R. Biden (Dem),2474507,49.51
2,President of the United States,Jo Jorgensen (Lib),62138,1.24
3,US Senate (Perdue),David A. Perdue (I) (Rep),2462617,49.73
4,US Senate (Perdue),Jon Ossoff (Dem),2374519,47.95


In [12]:
summary_results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 470 entries, 0 to 469
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   clarity_office     470 non-null    object 
 1   clarity_candidate  470 non-null    object 
 2   votes              470 non-null    int64  
 3   percent_votes      470 non-null    float64
dtypes: float64(1), int64(1), object(2)
memory usage: 14.8+ KB


### Use this next cell to lookup Clarity information...

In [None]:
pd.set_option('max_colwidth', None)
summary_results[summary_results['clarity_candidate'].str.contains("Halpern")].T

In [13]:
branch_candidate_office_20201201 = [
    {
        'branch_id': 'linda-pritchett',
        'branch_candidate': 'Linda Pritchett',
        'branch_race_id': 'ga-general-runoff-2020-ga-senate-39',
        'clarity_candidate': 'Linda Pritchett',
        'clarity_office': 'State Senate District 39 - Special Democratic Primary'
    },
    {
        'branch_id': 'sonya-halpern',
        'branch_candidate': 'Sonya Halpern',
        'branch_race_id': 'ga-general-runoff-2020-ga-senate-39',
        'clarity_candidate': 'Sonya Halpern',
        'clarity_office': 'State Senate District 39 - Special Democratic Primary'
    },
    {
        
        'branch_id': 'kwanza-hall',
        'branch_candidate': 'Kwanza Hall',
        'branch_race_id': 'ga-special-2020-us-house-5',
        'clarity_candidate': 'Kwanza Hall',
        'clarity_office': 'US House District 5'
    },
    {
        'branch_id': 'robert-franklin',
        'branch_candidate': 'Robert Franklin',
        'branch_race_id': 'ga-special-2020-us-house-5',
        'clarity_candidate': 'Robert Franklin',
        'clarity_office': 'US House District 5'
    },
]

In [14]:
branch_info = pd.DataFrame(branch_candidate_office_20201201)

In [15]:
branch_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   branch_id          4 non-null      object
 1   branch_candidate   4 non-null      object
 2   branch_race_id     4 non-null      object
 3   clarity_candidate  4 non-null      object
 4   clarity_office     4 non-null      object
dtypes: object(5)
memory usage: 288.0+ bytes


In [16]:
pd.set_option('max_colwidth', None)
branch_info.head()

Unnamed: 0,branch_id,branch_candidate,branch_race_id,clarity_candidate,clarity_office
0,linda-pritchett,Linda Pritchett,ga-general-runoff-2020-ga-senate-39,Linda Pritchett,State Senate District 39 - Special Democratic Primary
1,sonya-halpern,Sonya Halpern,ga-general-runoff-2020-ga-senate-39,Sonya Halpern,State Senate District 39 - Special Democratic Primary
2,kwanza-hall,Kwanza Hall,ga-special-2020-us-house-5,Kwanza Hall,US House District 5
3,robert-franklin,Robert Franklin,ga-special-2020-us-house-5,Robert Franklin,US House District 5


In [17]:
type(branch_info)

pandas.core.frame.DataFrame

In [18]:
pd.set_option('max_colwidth', None)
branch_info.head().T

Unnamed: 0,0,1,2,3
branch_id,linda-pritchett,sonya-halpern,kwanza-hall,robert-franklin
branch_candidate,Linda Pritchett,Sonya Halpern,Kwanza Hall,Robert Franklin
branch_race_id,ga-general-runoff-2020-ga-senate-39,ga-general-runoff-2020-ga-senate-39,ga-special-2020-us-house-5,ga-special-2020-us-house-5
clarity_candidate,Linda Pritchett,Sonya Halpern,Kwanza Hall,Robert Franklin
clarity_office,State Senate District 39 - Special Democratic Primary,State Senate District 39 - Special Democratic Primary,US House District 5,US House District 5


In [19]:
merged_results = pd.merge(branch_info, summary_results, on=['clarity_candidate', 'clarity_office'], how='inner')

In [20]:
merged_results.head()

Unnamed: 0,branch_id,branch_candidate,branch_race_id,clarity_candidate,clarity_office,votes,percent_votes
0,linda-pritchett,Linda Pritchett,ga-general-runoff-2020-ga-senate-39,Linda Pritchett,State Senate District 39 - Special Democratic Primary,17573,25.37
1,sonya-halpern,Sonya Halpern,ga-general-runoff-2020-ga-senate-39,Sonya Halpern,State Senate District 39 - Special Democratic Primary,31294,45.19


In [21]:
type(merged_results)

pandas.core.frame.DataFrame

In [22]:
merged_results.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2 entries, 0 to 1
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   branch_id          2 non-null      object 
 1   branch_candidate   2 non-null      object 
 2   branch_race_id     2 non-null      object 
 3   clarity_candidate  2 non-null      object 
 4   clarity_office     2 non-null      object 
 5   votes              2 non-null      int64  
 6   percent_votes      2 non-null      float64
dtypes: float64(1), int64(1), object(5)
memory usage: 128.0+ bytes


In [23]:
type(merged_results)

pandas.core.frame.DataFrame

In [24]:
merged_results.insert(0, 'last_updated', last_updated)

In [25]:
merged_results.head()

Unnamed: 0,last_updated,branch_id,branch_candidate,branch_race_id,clarity_candidate,clarity_office,votes,percent_votes
0,2020-11-20 15:37:10.000000,linda-pritchett,Linda Pritchett,ga-general-runoff-2020-ga-senate-39,Linda Pritchett,State Senate District 39 - Special Democratic Primary,17573,25.37
1,2020-11-20 15:37:10.000000,sonya-halpern,Sonya Halpern,ga-general-runoff-2020-ga-senate-39,Sonya Halpern,State Senate District 39 - Special Democratic Primary,31294,45.19


In [26]:
election_results_csv = io.StringIO()
columns = [
    'last_updated',
    'branch_id',
    'branch_race_id',
    'clarity_candidate',
    'clarity_office',
    'votes',
    'percent_votes'
]
merged_results.to_csv(election_results_csv, columns=columns, header=True, index=False)

In [27]:
election_results_csv.getvalue()

'last_updated,branch_id,branch_race_id,clarity_candidate,clarity_office,votes,percent_votes\n2020-11-20 15:37:10.000000,linda-pritchett,ga-general-runoff-2020-ga-senate-39,Linda Pritchett,State Senate District 39 - Special Democratic Primary,17573,25.37\n2020-11-20 15:37:10.000000,sonya-halpern,ga-general-runoff-2020-ga-senate-39,Sonya Halpern,State Senate District 39 - Special Democratic Primary,31294,45.19\n'

In [28]:
url = settings.BRANCH_URL_AUTH

headers = {
    "Content-Type": "application/json",
}

data = {
    "email": settings.BRANCH_EMAIL,
    "password": settings.BRANCH_PASSWORD,
    "strategy": settings.BRANCH_STRATEGY,
}

In [29]:
results = requests.post(url, json=data, headers=headers)
results.status_code

201

In [30]:
branch_token = results.json().get('accessToken')
branch_token

'eyJhbGciOiJIUzI1NiIsInR5cCI6ImFjY2VzcyJ9.eyJpYXQiOjE2MDY2Nzc1NzUsImV4cCI6MTYzODIxMzU3NSwiYXVkIjoiaHR0cHM6Ly9icmFuY2guY2hhdCIsImlzcyI6ImZlYXRoZXJzIiwic3ViIjoiNWZhMDhiMmJkMjE5MGYwMDE3OTI0MzY5IiwianRpIjoiY2FlZWE0ZDctMTg4Ny00OTY5LTliMzEtNmUyNjhkYWEzMDI5In0.wpnVAWr5icNsTlULCtRbY6pPpMsBXAiAzBxJHJQ_SvE'

In [33]:
url = settings.BRANCH_URL

headers = {
    'Authorization': branch_token
}

files = {'file': election_results_csv.getvalue()}

In [34]:
r = requests.post(url, files=files, headers=headers)
r.status_code

201

In [35]:
r.text

'{"location":"https://branch-production-bucket.s3.amazonaws.com/electionResults/1606677672080_file"}'