In [1]:
import os
import csv
import numpy as np
import pandas as pd

In [2]:
# setting up path to get all excel files in raw data folder
path = 'raw'
os.chdir(path)

raw_data = [file_name for file_name in os.listdir(os.getcwd()) if '.xlsx' in file_name]

In [3]:
raw_data

['Governor.xlsx',
 'Insurance Commissioner.xlsx',
 'KC Clerk of the Peace.xlsx',
 'KC Register of Wills.xlsx',
 'Levy Court District 1.xlsx',
 'Levy Court District 3.xlsx',
 'Levy Court District 5.xlsx',
 'Lieutenant Governor.xlsx',
 'NCC Clerk of the Peace.xlsx',
 'NCC County Council District 10.xlsx',
 'NCC County Council District 11.xlsx',
 'NCC County Council District 12.xlsx',
 'NCC County Council District 7.xlsx',
 'NCC County Council District 8.xlsx',
 'NCC County Council District 9.xlsx',
 'NCC County Executive.xlsx',
 'NCC President of County Council.xlsx',
 'President and Vice President.xlsx',
 'Representative in Congress.xlsx',
 'SC Clerk of the Peace.xlsx',
 'SC County Council District 1.xlsx',
 'SC County Council District 2.xlsx',
 'SC County Council District 3.xlsx',
 'State Representative District 1.xlsx',
 'State Representative District 10.xlsx',
 'State Representative District 11.xlsx',
 'State Representative District 12.xlsx',
 'State Representative District 13.xlsx',

In [1]:
# method to read data + preliminary cleaning
def read_raw_data(file_name, sheet_index, sheet_names):
    
    df = pd.read_excel(file_name, sheet_name=sheet_index)
    office = file_name[:len(file_name) - 5]
    precinct = sheet_names[sheet_index]

    df['office'] = office.upper()
    df['precinct'] = ''.join([str(n) for n in list(precinct) if n.isdigit() or n=='-'])
    
    df = df.applymap(lambda x:x.upper() if type(x) == str else x)
    
    return df

In [8]:
# concatenating every excel sheet to get one csv with all the data
df = pd.DataFrame()

for file in raw_data:
    xl = pd.ExcelFile(file)
    sheet_names = xl.sheet_names
    num_sheets = len(xl.sheet_names)
    for i in range(0, len(xl.sheet_names)):
        df = pd.concat([df, read_raw_data(file, i, sheet_names)])

# filling NaN values with empty strings
df.fillna('', inplace=True)

In [9]:
df.rename(columns={'Candidate Name' : 'candidate', 'Party Name' : 'party_detailed', 'Machine Votes' : 'ELECTION DAY', 'Absentee Votes' : 'ABSENTEE'}, inplace=True)
df.drop(columns=['Total Votes', 'Percentage'], inplace=True)

In [10]:
# melting data to get mode
df = pd.melt(df, id_vars=['precinct', 'candidate', 'party_detailed', 'office'], value_vars=['ELECTION DAY', 'ABSENTEE'], var_name='mode', value_name='votes')

In [13]:
# the process to read all the excel sheets takes a while, so this is an intermediary csv to clean directly when restarting kernel
df.to_csv('combined_raw_de.csv', index=False)

In [3]:
df = pd.read_csv('/Users/sophiazheng/Documents/GitHub/2020-precincts/precinct/DE/raw/combined_raw_de.csv')

In [4]:
# like in the README.md, this is to flip the precinct format as ED-RD instead of RD-ED which is the format of 
# the original raw data
def flip_eds(x):
    x = x.split('-')[1] + '-' + x.split('-')[0]
    return x
df['precinct'] = df['precinct'].apply(flip_eds)

In [5]:
df

Unnamed: 0,precinct,candidate,party_detailed,office,mode,votes
0,01-01,JOHN C. CARNEY JR.,DEMOCRATIC PARTY,GOVERNOR,ELECTION DAY,408
1,01-01,JULIANNE E. MURRAY,REPUBLICAN PARTY,GOVERNOR,ELECTION DAY,44
2,01-01,KATHY S. DEMATTEIS,INDEPENDENT PARTY OF DELAWARE,GOVERNOR,ELECTION DAY,5
3,01-01,JOHN J. MACHUREK,LIBERTARIAN PARTY,GOVERNOR,ELECTION DAY,4
4,02-01,JOHN C. CARNEY JR.,DEMOCRATIC PARTY,GOVERNOR,ELECTION DAY,753
...,...,...,...,...,...,...
24265,16-04,DAWAYNE SIMS,DEMOCRATIC PARTY,WILM CITY TREASURER,ABSENTEE,28
24266,03-13,DAWAYNE SIMS,DEMOCRATIC PARTY,WILM CITY TREASURER,ABSENTEE,32
24267,12-13,DAWAYNE SIMS,DEMOCRATIC PARTY,WILM CITY TREASURER,ABSENTEE,1
24268,13-13,DAWAYNE SIMS,DEMOCRATIC PARTY,WILM CITY TREASURER,ABSENTEE,16


In [6]:
def cleanParty(x):
    if 'DEMOCRATIC' in x:
        return 'DEMOCRAT'
    if 'REPUBLICAN' in x:
        return 'REPUBLICAN'
    if 'INDEPENDENT' in x:
        return 'INDEPENDENT'
    if 'GREEN' in x:
        return 'GREEN'
    if 'LIBERTARIAN' in x:
        return 'LIBERTARIAN'
    return ''
df['party_detailed'] = df['party_detailed'].apply(cleanParty)
df['party_simplified'] = df['party_detailed'].replace({'GREEN': 'OTHER'})

In [7]:
# merging precinct+county csv to get county names based on the precinct
county_names = pd.read_csv('/Users/sophiazheng/Documents/GitHub/2020-precincts/precinct/DE/raw/election_district_to_county.csv')
county_names.rename(columns={'Election District' : 'precinct'}, inplace=True)
df = pd.merge(df, county_names, on='precinct', how='left')
df.dropna(axis=0, subset=['precinct'], inplace=True)
df.rename(columns={'County' : 'county_name'}, inplace=True)
df

Unnamed: 0,precinct,candidate,party_detailed,office,mode,votes,party_simplified,State House District,State Senate District,county_name
0,01-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,408,DEMOCRAT,1.0,1.0,NEW CASTLE
1,01-01,JULIANNE E. MURRAY,REPUBLICAN,GOVERNOR,ELECTION DAY,44,REPUBLICAN,1.0,1.0,NEW CASTLE
2,01-01,KATHY S. DEMATTEIS,INDEPENDENT,GOVERNOR,ELECTION DAY,5,INDEPENDENT,1.0,1.0,NEW CASTLE
3,01-01,JOHN J. MACHUREK,LIBERTARIAN,GOVERNOR,ELECTION DAY,4,LIBERTARIAN,1.0,1.0,NEW CASTLE
4,02-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,753,DEMOCRAT,1.0,2.0,NEW CASTLE
...,...,...,...,...,...,...,...,...,...,...
24265,16-04,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,28,DEMOCRAT,4.0,3.0,NEW CASTLE
24266,03-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,32,DEMOCRAT,13.0,3.0,NEW CASTLE
24267,12-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,1,DEMOCRAT,13.0,3.0,NEW CASTLE
24268,13-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,16,DEMOCRAT,13.0,3.0,NEW CASTLE


In [8]:
# some counties were not present in the election district pdf, manually assigning them
def addCounty(x, y):
    if x=='17-31':
        return 'KENT'
    if x=='17-41':
        return 'SUSSEX'
    if any(s==x for s in ['18-02', '10-09', '11-09', '12-09', '13-09']):
        return 'NEW CASTLE'
    return y
df['county_name'] = df.apply(lambda x: addCounty(x['precinct'], x['county_name']), axis=1)

In [9]:
# merging county_name to get county_fips
county_fips = pd.read_csv('/Users/sophiazheng/Documents/GitHub/2020-precincts/help-files/county-fips-codes.csv')
county_fips = county_fips[county_fips['state']=='Delaware'].drop(columns='state')
df = df.merge(county_fips, on='county_name', how='left')
df

Unnamed: 0,precinct,candidate,party_detailed,office,mode,votes,party_simplified,State House District,State Senate District,county_name,county_fips
0,01-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,408,DEMOCRAT,1.0,1.0,NEW CASTLE,10003
1,01-01,JULIANNE E. MURRAY,REPUBLICAN,GOVERNOR,ELECTION DAY,44,REPUBLICAN,1.0,1.0,NEW CASTLE,10003
2,01-01,KATHY S. DEMATTEIS,INDEPENDENT,GOVERNOR,ELECTION DAY,5,INDEPENDENT,1.0,1.0,NEW CASTLE,10003
3,01-01,JOHN J. MACHUREK,LIBERTARIAN,GOVERNOR,ELECTION DAY,4,LIBERTARIAN,1.0,1.0,NEW CASTLE,10003
4,02-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,753,DEMOCRAT,1.0,2.0,NEW CASTLE,10003
...,...,...,...,...,...,...,...,...,...,...,...
24265,16-04,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,28,DEMOCRAT,4.0,3.0,NEW CASTLE,10003
24266,03-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,32,DEMOCRAT,13.0,3.0,NEW CASTLE,10003
24267,12-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,1,DEMOCRAT,13.0,3.0,NEW CASTLE,10003
24268,13-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,16,DEMOCRAT,13.0,3.0,NEW CASTLE,10003


In [10]:
df['county_name'] = df['county_name'].astype(str)
col=np.array(df['county_fips'], np.int16)
df['county_fips'] = col.astype(str)
df['jurisdiction_name'] = df['county_name']
df['jurisdiction_fips'] = df['county_fips']
df['votes'] = df['votes'].str.replace(',', '')
df['votes'] = df['votes'].astype(int)
df['state'] = 'DELAWARE'
df['state_po'] = 'DE'
df['state_fips'] = '10'
df['state_cen'] = '51'
df['state_ic'] = '11'
df['stage'] = 'GEN'
df['year'] = '2020'
df['date'] = '2020-11-03'
df['special'] = 'FALSE'
df['writein'] = 'FALSE'

In [11]:
df

Unnamed: 0,precinct,candidate,party_detailed,office,mode,votes,party_simplified,State House District,State Senate District,county_name,...,state,state_po,state_fips,state_cen,state_ic,stage,year,date,special,writein
0,01-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,408,DEMOCRAT,1.0,1.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
1,01-01,JULIANNE E. MURRAY,REPUBLICAN,GOVERNOR,ELECTION DAY,44,REPUBLICAN,1.0,1.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
2,01-01,KATHY S. DEMATTEIS,INDEPENDENT,GOVERNOR,ELECTION DAY,5,INDEPENDENT,1.0,1.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
3,01-01,JOHN J. MACHUREK,LIBERTARIAN,GOVERNOR,ELECTION DAY,4,LIBERTARIAN,1.0,1.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
4,02-01,JOHN C. CARNEY JR.,DEMOCRAT,GOVERNOR,ELECTION DAY,753,DEMOCRAT,1.0,2.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24265,16-04,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,28,DEMOCRAT,4.0,3.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
24266,03-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,32,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
24267,12-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,1,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE
24268,13-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,16,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DELAWARE,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE


In [12]:
def cleanCandidate(x):
    x = x.replace('.', '')
    if 'SHERAE' in x:
        return 'SHERAE\'A "RAE" MOORE'
    if ' AND ' in x:
        return x.split(' AND ')[0]
    return x
df['candidate'] = df['candidate'].apply(cleanCandidate)

In [13]:
def getDistrict(x):
    if 'DISTRICT' in x:
        return ''.join([str(n) for n in list(x) if n.isdigit()]).zfill(3)
    if 'REPRESENTATIVE IN CONGRESS' in x:
        return '000'
    if any(s in x for s in ['AT LARGE', 'WILM CITY MAYOR', 'WILM CITY PRESIDENT OF CITY COUNCIL', 'WILM CITY TREASURER']):
        return 'AT-LARGE'
    if any(s in x for s in ['PRESIDENT AND VICE PRESIDENT', 'GOVERNOR', 'INSURANCE COMMISSIONER', 
                            'U.S. SENATOR', 'LIEUTENANT GOVERNOR']):
        return 'STATEWIDE'
    return ''
df['district'] = df['office'].apply(getDistrict)

In [14]:
df

Unnamed: 0,precinct,candidate,party_detailed,office,mode,votes,party_simplified,State House District,State Senate District,county_name,...,state_po,state_fips,state_cen,state_ic,stage,year,date,special,writein,district
0,01-01,JOHN C CARNEY JR,DEMOCRAT,GOVERNOR,ELECTION DAY,408,DEMOCRAT,1.0,1.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,STATEWIDE
1,01-01,JULIANNE E MURRAY,REPUBLICAN,GOVERNOR,ELECTION DAY,44,REPUBLICAN,1.0,1.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,STATEWIDE
2,01-01,KATHY S DEMATTEIS,INDEPENDENT,GOVERNOR,ELECTION DAY,5,INDEPENDENT,1.0,1.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,STATEWIDE
3,01-01,JOHN J MACHUREK,LIBERTARIAN,GOVERNOR,ELECTION DAY,4,LIBERTARIAN,1.0,1.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,STATEWIDE
4,02-01,JOHN C CARNEY JR,DEMOCRAT,GOVERNOR,ELECTION DAY,753,DEMOCRAT,1.0,2.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,STATEWIDE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24265,16-04,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,28,DEMOCRAT,4.0,3.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,AT-LARGE
24266,03-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,32,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,AT-LARGE
24267,12-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,1,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,AT-LARGE
24268,13-13,DAWAYNE SIMS,DEMOCRAT,WILM CITY TREASURER,ABSENTEE,16,DEMOCRAT,13.0,3.0,NEW CASTLE,...,DE,10,51,11,GEN,2020,2020-11-03,FALSE,FALSE,AT-LARGE


In [15]:
# these magnitudes were manually added
def getMagnitude(x):
    if x == 'WILM CITY COUNCIL AT LARGE':
        return 4
    return 1
df['magnitude'] = df['office'].apply(getMagnitude)

In [16]:
def cleanOffice(x):
    if 'PRESIDENT AND VICE PRESIDENT' in x:
        return 'US PRESIDENT'
    if 'STATE REPRESENTATIVE' in x:
        return 'STATE HOUSE'
    if 'STATE SENATOR' in x:
        return 'STATE SENATE'
    if 'U.S. SENATOR' in x:
        return 'US SENATE' 
    if 'REPRESENTATIVE IN CONGRESS' in x:
        return 'US HOUSE'
    if 'DISTRICT' in x: 
        x = x.split(' DISTRICT')[0]
    if ' AT LARGE' in x:
        x = x.replace(' AT LARGE', '')
    if 'WILM CITY ' in x:
        if x.split('WILM CITY ')[1] == 'COUNCIL':
            return 'CITY COUNCIL - WILMINGTON' 
        return x.split('WILM CITY ')[1] + ' - WILMINGTON'
    if 'KC ' in x:
        return x.split('KC ')[1] + ' - KENT'
    if 'NCC ' in x:
        return x.split('NCC ')[1] + ' - NEW CASTLE'
    if 'SC ' in x:
        return x.split('SC ')[1] + ' - SUSSEX'
    return x
df['office'] = df['office'].apply(cleanOffice)

In [17]:
def getDataverse(x):
    if 'US PRESIDENT' in x:
        return 'PRESIDENT'
    if 'US SENATE' in x:
        return 'SENATE'
    if 'US HOUSE' in x:
        return 'HOUSE'
    if any(s in x for s in ['STATE', 'GOVERNOR', 'INSURANCE COMMISSIONER', 'LIEUTENANT GOVERNOR']):
        return 'STATE'
    return 'LOCAL'
df['dataverse'] = df['office'].apply(getDataverse)

In [18]:
def readme_check(x):
    if any(s==x for s in ['17-31', '17-41']):
        return 'TRUE'
    return 'FALSE'
df['readme_check'] = df['precinct'].apply(readme_check)

In [19]:
df.drop(columns=['State House District', 'State Senate District'], inplace=True)

In [20]:
column_names = ['precinct', 'office', 'party_detailed', 'party_simplified', 'mode', 'votes', 
                'county_name', 'county_fips', 'jurisdiction_name', 'jurisdiction_fips', 'candidate', 
                'district', 'dataverse', 'year', 'stage', 'state', 'special', 'writein', 'state_po', 
                'state_fips', 'state_cen', 'state_ic', 'date', 'readme_check', 'magnitude']
df = df.reindex(columns=column_names)
df.to_csv('2020-de-precinct-general.csv', index=False, quoting=csv.QUOTE_NONNUMERIC)

In [188]:
# sorted(df.office.unique())

In [35]:
# sorted(df.candidate.unique())

['ADEWUNMI "ADE" KUFORIJI',
 'ALEXANDER HACKETT',
 'ALEXANDER M HOMICH',
 'ALLAN ANGEL',
 'AMY A MERLINO',
 'ANDRIA BENNETT',
 'ANTHONY DELCOLLO',
 'BETHANY HALL-LONG',
 'BREGETTA A FIELDS',
 'BRENDA WOOTTEN',
 'BRIAN G PETTYJOHN',
 'BRIAN G WHITAKER',
 'BRUCE C ENNIS',
 'BRYAN W SHUPE',
 'CALVIN BROWN',
 'CATHERINE A CLOUTIER',
 'CATHERINE S PURCELL',
 'CHARLES S POSTLES JR',
 'CHERYL PRECOURT',
 'CHRIS JOHNSON',
 'CHRISTOPHER A COONS',
 'CHUCK GROCE',
 'CIRO ADAMS',
 'CLINT BROTHERS',
 'CRAIG PUGH',
 'CYNTHIA C GREEN',
 'DANIEL B SHORT',
 'DANIEL ZITOFSKY',
 'DARRYNN HARRIS',
 'DAVID L ROGERS',
 'DAVID L TACKETT',
 'DAVID LAWSON',
 'DAVID P SOKOLA',
 'DAVID S BENTZ',
 'DAWAYNE SIMS',
 'DAYL C THOMAS',
 'DEBBIE HARRINGTON',
 'DEBRA HEFFERNAN',
 'DONALD CARL "TRIPP" KEISTER III',
 'DONALD J TRUMP',
 'DONYALE HALL',
 'DOUG CHERVENAK',
 'EDWARD OSIENSKI',
 'ERIC A MORRISON',
 'FRANKLIN D COOKE JR',
 'GEORGE "JODY" SWEENEY',
 'GEORGE SMILEY',
 'GERALD L BRADY',
 'GERALD W HOCKER',
 'GREG 

In [43]:
df

Unnamed: 0,precinct,office,party_detailed,party_simplified,mode,votes,county_name,county_fips,jurisdiction_name,jurisdiction_fips,...,state,special,writein,state_po,state_fips,state_cen,state_ic,date,readme_check,magnitude
0,01-01,GOVERNOR,DEMOCRAT,DEMOCRAT,ELECTION DAY,408,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
1,01-01,GOVERNOR,REPUBLICAN,REPUBLICAN,ELECTION DAY,44,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
2,01-01,GOVERNOR,INDEPENDENT,INDEPENDENT,ELECTION DAY,5,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
3,01-01,GOVERNOR,LIBERTARIAN,LIBERTARIAN,ELECTION DAY,4,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
4,02-01,GOVERNOR,DEMOCRAT,DEMOCRAT,ELECTION DAY,753,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24265,16-04,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,28,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24266,03-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,32,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24267,12-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,1,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24268,13-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,16,NEW CASTLE,10003.0,NEW CASTLE,10003.0,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1


In [99]:
df[(df['county_name'] == '')].to_csv('test.csv', index=False)

In [97]:
df

Unnamed: 0,precinct,office,party_detailed,party_simplified,mode,votes,county_name,county_fips,jurisdiction_name,jurisdiction_fips,...,state,special,writein,state_po,state_fips,state_cen,state_ic,date,readme_check,magnitude
0,01-01,GOVERNOR,DEMOCRAT,DEMOCRAT,ELECTION DAY,408,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
1,01-01,GOVERNOR,REPUBLICAN,REPUBLICAN,ELECTION DAY,44,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
2,01-01,GOVERNOR,INDEPENDENT,INDEPENDENT,ELECTION DAY,5,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
3,01-01,GOVERNOR,LIBERTARIAN,LIBERTARIAN,ELECTION DAY,4,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
4,02-01,GOVERNOR,DEMOCRAT,DEMOCRAT,ELECTION DAY,753,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24265,16-04,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,28,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24266,03-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,32,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24267,12-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,1,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
24268,13-13,TREASURER - WILMINGTON,DEMOCRAT,DEMOCRAT,ABSENTEE,16,NEW CASTLE,10003,NEW CASTLE,10003,...,DELAWARE,FALSE,FALSE,DE,10,51,11,2020-11-03,,1
