In [329]:
import pandas as pd
import numpy as np
import os
import electioncleaner as EC
import csv
pd.options.display.max_columns = 100
pd.options.display.max_rows = 800

In [330]:
# converts to long form, removes double counting
def format_data(df, office):
    df = df.rename(columns={"TOWN":"MUNICIPALITY","CTY":"COUNTY","DIST":"DISTRICT"})
    df = df.iloc[1:]
    if isinstance(df.iloc[0,df.columns.get_loc("MUNICIPALITY")+1],str): #if party/writein available, add to candidate
        df.columns = (df.columns+'-'+df.iloc[0,:].fillna('')).str.strip('-')
    df['MUNICIPALITY'] = df['MUNICIPALITY'].astype(str)
    df = df[~((df.MUNICIPALITY.str.contains('Total'))|(df.MUNICIPALITY.str.contains('nan'))|(df.MUNICIPALITY.str.contains('Tottal'))|(df.MUNICIPALITY.str.contains('total')))]
    df = df.drop(columns={col for col in list(df.columns) if 'Unnamed' in col})
    if 'DISTRICT' in df.columns:
        i_d = df.columns[:3].tolist()
        val = df.columns[3:].tolist()
    else:
        i_d = df.columns[:2].tolist()
        val = df.columns[2:].tolist()
    df = pd.melt(df, id_vars=i_d, value_vars=val,value_name='votes',var_name='candidate')
    df['votes'] = df['votes'].astype(int)
    df['office'] = office
    return df

In [331]:
# created separate format function to deal with one raw file that was different than the rest
def format_acf(df,office):
    df = df.rename(columns={"TOWN":"MUNICIPALITY","CTY":"COUNTY","DIST":"DISTRICT"})
    if isinstance(df.iloc[0,df.columns.get_loc("MUNICIPALITY")+1],str):
        df = df.iloc[1:]
    df= df.iloc[:-1]
    df['MUNICIPALITY'] = df['MUNICIPALITY'].astype(str)
    df = df.drop(columns={col for col in list(df.columns) if 'Unnamed' in col})
    i_d = df.columns[:4].tolist()
    val = df.columns[4:].tolist()
    df = pd.melt(df, id_vars=i_d, value_vars=val,value_name='votes',var_name='candidate')
    df['votes'] = df['votes'].astype(int)
    df['office'] = office
    return df

In [332]:
def fix_office(df):
    old_titles=sorted(list(df['office'].unique()))
    office_titles = ['Finance Committee','County Commissioner',
                     'Judge of Probate','Budget Committee','US PRESIDENT',
                     'Register of Probate', 'US HOUSE','STATE HOUSE','Sheriff','STATE SENATE','US SENATE']
    office_dict = dict(zip(old_titles, office_titles))
    df = df.replace(office_dict)
    df['office']=df['office'].str.upper()
    return df

In [333]:
def split_candidate_party(df):
    df['candidate'] = df['candidate'].str.upper().str.replace('WRITE-IN','WRITEIN').str.replace('PREFERRED CANDIDATE- ','')
    cand_party_list=[i.split('-') for i in df['candidate']]
    cand_party_list=[i+[''] if len(i)<2 else i for i in cand_party_list]
    cand = [' '.join(reversed(i[0].split(', '))) for i in cand_party_list]
    party = [i[1] for i in cand_party_list]
    df['party_detailed'] = party
    df['party_detailed'] = df['party_detailed'].str.replace('DEMOCRATIC','DEMOCRAT',regex=False)
    df['candidate']= cand
    df['candidate'] = df['candidate'].str.replace('.','',regex=True).str.replace('  ',' ',regex=True)
    return df

In [334]:
def fix_writein(df):  
#     df['writein'] = np.where((df['candidate'].str.contains('WRITE'))|(df['party_detailed'].str.contains('WRITE')),
#                             True, False)
    df['candidate'] = df['candidate'].str.replace('\(WRITEIN\)','',regex=True)
    df['party_detailed'] = np.where(df['party_detailed'].str.contains('WRITE'),'',df['party_detailed'])
    return df

In [335]:
def get_party_simplified(x):
    if x in ['DEMOCRAT','REPUBLICAN','NONPARTISAN',"LIBERTARIAN"]: return x
    if x == '': return ''
    else: return "OTHER"

In [336]:
def fix_district(x):
    if x == 'I': return '001'
    if x == 'III': return '003'
    if x>0: return str(x).split('.0')[0].zfill(3)
    else: return ''

In [337]:
def get_mode(x):
    if x == 'STATE UOCAVA': return 'UOCAVA'
    else: return 'TOTAL'

In [338]:
def fix_county(df):
    county_fips=pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/help-files/county-fips-codes.csv')
    county_fips = county_fips[county_fips['state']=='Maine']
    absentee_index=list(df[df['MUNICIPALITY']=='STATE UOCAVA'].index)
    for i in absentee_index:
        df['COUNTY'].iloc[i] = df['COUNTY'].iloc[i-1]
    df['candidate'] = np.where((df['MUNICIPALITY']=='STATE UOCAVA'),df['candidate']+' - UOCAVA TOTAL',df['candidate'])
    df['MUNICIPALITY'] = df['MUNICIPALITY'].replace('STATE UOCAVA','')
    old_titles=sorted(df['COUNTY'].unique())
    new_titles = sorted(county_fips['county_name'].unique())
    county_dict = dict(zip(old_titles, new_titles))
    df = df.replace(county_dict)
    df = df.rename(columns={"DISTRICT":'district','COUNTY':'county_name','MUNICIPALITY':'precinct'})#fix column names
    df=df.merge(county_fips, on='county_name')
    return df

In [339]:
def get_dataverse(x):
    if x =='US PRESIDENT': return 'PRESIDENT'
    if x == 'US HOUSE': return 'HOUSE'
    if x =='US SENATE': return 'SENATE'
    if x in ['STATE SENATE', 'STATE HOUSE']: return 'STATE'
    else: return 'LOCAL'

In [340]:
def fix_candidate(x):
    if 'BLANK (NO CANDIDATE)' in x: return x.replace('BLANK (NO CANDIDATE)','UNDERVOTES')
    if 'BLANK' in x: return x.replace('BLANK','UNDERVOTES')
    if 'OTHERS' in x: return x.replace('OTHERS','WRITEIN')
    if x=='ROQUE DE LA FUENTE': return 'ROQUE "ROCKY" DE LA FUENTE'
    if 'III ' in x: 
        if 'UOCAVA' in x: 
            return ('').join(x.replace(' - UOCAVA TOTAL','').split('III '))+' III' + ' - UOCAVA TOTAL'
        else: return ('').join(x.split('III '))+' III'
    if 'II ' in x: 
        if 'UOCAVA' in x: 
            return ('').join(x.replace(' - UOCAVA TOTAL','').split('II '))+' II' + ' - UOCAVA TOTAL'
        else: return ('').join(x.split('II '))+' II'
    if 'IV ' in x: 
        if 'UOCAVA' in x: 
            return ('').join(x.replace(' - UOCAVA TOTAL','').split('IV '))+' IV' + ' - UOCAVA TOTAL'
        else: return ('').join(x.split('IV '))+' IV'
    if 'SR ' in x: 
        if 'UOCAVA' in x: 
            return ('').join(x.replace(' - UOCAVA TOTAL','').split('SR '))+' SR' + ' - UOCAVA TOTAL'
        else: return ('').join(x.split('SR '))+' SR' 
    if 'JR ' in x: 
        if 'UOCAVA' in x: 
            return ('').join(x.replace(' - UOCAVA TOTAL','').split('JR '))+' JR' + ' - UOCAVA TOTAL'
        else: return ('').join(x.split('JR '))+' JR' 
    else: return x

In [342]:
# loops through all raw files, and applies the appropriate function to convert to long format
# 
# most of the data (non-statewide) is reported in stacked tables for different districts in a given race.
# see documentation in loop

path_to_raw = '/Users/declanchin/Desktop/MEDSL/2020-precincts/precinct/ME/raw/'
statewide_files = ['ussenator1120.xlsx','repcongress1120.xlsx','presandvisecnty1120.xlsx']
all_files = [f for f in os.listdir(path_to_raw) if '1120' in f]
df_list = []
for file in all_files:
    if file in statewide_files:
        if file == 'repcongress1120.xlsx':
            dist1 = pd.read_excel(path_to_raw+file,sheet_name=0)
            dist2 = pd.read_excel(path_to_raw+file,sheet_name=1)
            df = pd.concat([format_data(dist1,file),format_data(dist2,file)])
            df_list = df_list + [df]
        else:
            df = pd.read_excel(path_to_raw+file)
            df = format_data(df,file)
            df_list = df_list + [df]
    else:
        # this for loop parses through the stacked data and returns a distinct dataframe for each one within
        # a given excel file. doneso thru utlizing the index of null rows that separate stacked tables.
        # **altered state senate raw data by adding null rows in between stacked tables**
        stacked_df=pd.read_excel(path_to_raw+file)
        separator_index=list(stacked_df.index[stacked_df.isna().all(axis=1)])
        xl = pd.ExcelFile(path_to_raw+file)
        stack_list = []
        for i in np.arange(len(separator_index)+1):
            if i ==0:
                df = xl.parse(0, skipfooter= (len(stacked_df)-separator_index[0]))
                stack_list = stack_list + [df]
            elif i in np.arange(len(separator_index)):
                df = xl.parse(0, skiprows=(separator_index[i-1]+2),skipfooter= len(stacked_df)-separator_index[i])
                stack_list = stack_list + [df]
            else:
                df = xl.parse(0, skiprows=(separator_index[i-1]+2))
                stack_list = stack_list + [df]
        stack_list = [i for i in stack_list if len(i)>0] #removed df created as a result of multiple null rows
        if file == 'acf1120.xlsx':
            stack_formated = [format_acf(i,file) for i in stack_list] 
            df_list = df_list + stack_formated
        else: 
            stack_formated = [format_data(i,file) for i in stack_list]
            df_list = df_list + stack_formated
df=pd.concat(df_list).reset_index(drop=True)

In [343]:
#run all functions
df=fix_office(df)
df=split_candidate_party(df)
df=fix_writein(df)
df['party_simplified'] = df.party_detailed.apply(get_party_simplified)
df['DISTRICT'] = df.DISTRICT.apply(fix_district)
#retain info in "AREA" field and place in the district field (only for acf)
df['DISTRICT']= np.where(df['AREA'].notnull(),df['DISTRICT'].astype(str) + ', AREA ' + df['AREA'].astype(str).str.strip('\.0'),
                            df['DISTRICT'])
df = df.drop(columns = "AREA")
df['mode'] = df.MUNICIPALITY.apply(get_mode)
df = fix_county(df)
df['candidate'] = df.candidate.apply(fix_candidate)
df=df[~df['candidate'].str.contains('TBC')] #drop totals (double count)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [344]:
# add year, stage, state, date, office, jurisdiction,special
jurisdiction_fips = pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/help-files/jurisdiction-fips-codes.csv')
jurisdiction_fips = jurisdiction_fips[jurisdiction_fips['state']=='Maine'].drop(columns='state')
df['jurisdiction_name'] = df['precinct'].str.upper()
df=df.merge(jurisdiction_fips, on='jurisdiction_name', how='left')
df['jurisdiction_fips'] = df['jurisdiction_fips'].fillna('').astype(str).str.strip('\.0')

df['jurisdiction_fips'] = np.where(df['mode']=='UOCAVA', df['county_fips'],df['jurisdiction_fips'])
df['jurisdiction_name'] = np.where(df['mode']=='UOCAVA', df['county_name'],df['jurisdiction_name'])

df['year']= 2020
df['state'] = 'MAINE'
df['date']= '2020-11-03'
df['readme_check'] = 'FALSE'
df['writein'] = EC.series_r_bool((df['candidate'].str.contains('WRITE'))|(df['party_detailed'].str.contains('WRITE')))
df['stage']='GEN'
df['magnitude']=1
df['special'] = np.where(((df['office']=='COUNTY COMMISSIONER')&(df['district']=='002')&(df['county_name']=='ANDROSCOGGIN')),
                        'TRUE','FALSE')
df['candidate'] = df['candidate'].str.replace('  ', ' ')
# state codes
state_codes = pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/help-files/merge_on_statecodes.csv')
state_codes = state_codes[state_codes['state']=='Maine']
state_codes['state'] = state_codes['state'].str.upper()
df=df.merge(state_codes, on='state', how='left')

In [345]:
### Creates crosswalk for townships that do not match with our jurisdiction_fips.csv
### First utilizes string matching, then 2018-precincts matching to account for townships not found in 
### jurisdiction-fips file. Takes in df then returns df with converted township names for precincts with missing
### jurisdiction_fips information. 

def township_jurisdiction_crosswalk(df):
    # finds rows with no jurisdiction fips after initial merge
    to_crosswalk=df[df['jurisdiction_fips']==''][['county_fips','jurisdiction_name']]
    to_crosswalk=to_crosswalk[to_crosswalk['jurisdiction_name']!='']

    # performs merge again, but retains jurisdiction names of each respective file
    fips = pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/help-files/jurisdiction-fips-codes.csv')
    fips=fips[fips['state']=='Maine']
    fips['county_fips']=fips['jurisdiction_fips'].astype(str).str[:5].astype(int)
    crosswalk=to_crosswalk.merge(fips, on='county_fips',how='inner',suffixes=('_raw','_file'))
    # fips['county_fips']=fips['jurisdiction_fips'].astype(str).str[:5].astype(int)
    # crosswalk=to_crosswalk.merge(fips, on='county_fips',how='inner',suffixes=('_raw','_file'))

    # removes slashes and whitespace from each jurisdiction name from each file
    crosswalk['jurisdiction_first_raw']= [i[0] for i in crosswalk['jurisdiction_name_raw'].str.replace('/',' ').str.split(' ')]
    crosswalk['jurisdiction_first_file']= [i[0] for i in crosswalk['jurisdiction_name_file'].str.replace('/',' ').str.split(' ')]
    #loop to str match raw to file jurisdictions based on the first word of each 
    index_list=[]
    for i in crosswalk['jurisdiction_name_raw'].unique():
        sub = crosswalk[crosswalk['jurisdiction_name_raw']==i]
        if sum(sub['jurisdiction_first_raw']==sub['jurisdiction_first_file'])>0:
            index=list(sub[sub['jurisdiction_first_raw']==sub['jurisdiction_first_file']].index)
            index_list = index_list+index

    crosswalk_matched=crosswalk.iloc[index_list].drop_duplicates()
    # removes false positive matches, then retains the two columns needed to crosswalk
    crosswalk_matched=crosswalk_matched[~crosswalk_matched['jurisdiction_name_file'].isin(['FORT FAIRFIELD','EAST CENTRAL WASHINGTON UT'])]
    crosswalk_matched=crosswalk_matched.drop(crosswalk_matched[(crosswalk_matched['jurisdiction_name_raw'] == 'RANGELEY/ADAMSTOWN TWP') & (crosswalk_matched['jurisdiction_name_file'] == 'RANGELEY PLANTATION')].index)
    crosswalk_matched=crosswalk_matched.drop(crosswalk_matched[(crosswalk_matched['jurisdiction_name_raw'] == 'RANGELEY PLT') & (crosswalk_matched['jurisdiction_name_file'] == 'RANGELEY')].index).sort_values('jurisdiction_first_raw')
    crosswalk_matched=crosswalk_matched[['jurisdiction_name_raw','jurisdiction_name_file']]

    # this creates another crosswalk from the 2018 data to be used on any remaining blank jurisdiction fips
    # after merging with the above crosswalk.
    me_2018 = pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/precinct/ME/raw/2018-me-precinct.csv')
    me_2018 = me_2018[~((me_2018['precinct']=='County Totals')|(me_2018['precinct']=='STATE UOCAVA'))]
    j_p_mismatches=me_2018[(me_2018['jurisdiction']!=me_2018['precinct'])][['jurisdiction','precinct']]
    j_p_mismatches_unique = j_p_mismatches.drop_duplicates()
    j_p_mismatches_unique['jurisdiction']=j_p_mismatches_unique['jurisdiction'].str.upper()
    j_p_mismatches_unique['precinct']=j_p_mismatches_unique['precinct'].str.upper()
    j_p_mismatches_unique = j_p_mismatches_unique.rename(columns={'precinct':'jurisdiction_name_raw',
                                                                  'jurisdiction':'jurisdiction_name_file'})

    #this contains info from str matched precinct/jurisdiction combos (prioritized) and 2018 unique precinct/jurisdiction pairs
    complete_crosswalk=pd.concat([crosswalk_matched, j_p_mismatches_unique]).drop_duplicates('jurisdiction_name_raw', keep='first')
    complete_crosswalk=complete_crosswalk.rename(columns={'jurisdiction_name_raw':'jurisdiction_name'})
    saint_fixes=pd.DataFrame([['SAINT ALBANS','ST. ALBANS'],
                          ['SAINT AGATHA/SINCLAIR','ST. AGATHA'],
                          ['SAINT FRANCIS','ST. FRANCIS'],
                          ['SAINT JOHN PLT','ST. JOHN PLANTATION'],
                          ['SAINT GEORGE','ST. GEORGE']],columns=['jurisdiction_name','jurisdiction_name_file'])
    complete_crosswalk = pd.concat([saint_fixes,complete_crosswalk])
    complete_crosswalk=complete_crosswalk.drop_duplicates(subset = 'jurisdiction_name', keep = 'first')

    # left merger on original df using the complete crosswalk, but creates a new column to ensure no information is 
    # overided. Then only retain new jurisdiction names for rows with empty jurisdiction fips codes 
    df=df.merge(complete_crosswalk, how='left', on='jurisdiction_name')
    df['jurisdiction_name'] = np.where(df['jurisdiction_fips']=='',df['jurisdiction_name_file'],df['jurisdiction_name'])

    # now merges original fips file, and retains fips for blanks that were addressed by the two crosswalks.
    # Reassigned blanks to unmatched jurisdictions to county names. and assign blank juri-fips to county-fips
    fips_file = pd.read_csv('/Users/declanchin/Desktop/MEDSL/2020-precincts/help-files/jurisdiction-fips-codes.csv')
    fips_file['state'] = fips_file['state'].str.upper()
    fips_file['county_fips'] = fips_file['jurisdiction_fips'].apply(lambda fips: int(str(fips)[:5]))
    df = df.merge(fips_file, on=['state', 'county_fips', 'jurisdiction_name'], how="left")
    df = df.rename(columns={'jurisdiction_fips_y':'jurisdiction_fips'})
    df['jurisdiction_fips'] = df['jurisdiction_fips'].fillna('')
    df['jurisdiction_name'] = np.where(df['jurisdiction_fips']=='', df['county_name'], df['jurisdiction_name'])
    df['jurisdiction_fips'] = np.where(df['jurisdiction_fips']=='', df['county_fips'], df['jurisdiction_fips'])
    df['jurisdiction_fips'] = df['jurisdiction_fips'].astype(int)
    return df
df=township_jurisdiction_crosswalk(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  j_p_mismatches_unique['jurisdiction']=j_p_mismatches_unique['jurisdiction'].str.upper()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  j_p_mismatches_unique['precinct']=j_p_mismatches_unique['precinct'].str.upper()


In [346]:
#updating readme
df['jurisdiction_fips']=df['jurisdiction_fips'].astype(str)
def readme_check(x):
    if len(x) == 5: return 'TRUE'
    else: return 'FALSE'
df['readme_check'] = df.jurisdiction_fips.apply(readme_check) 

In [347]:
# corrections


# revert UOCAVA total to county floating
df['precinct'] = np.where(df['candidate'].str.contains('UOCAVA'), 'COUNTY FLOATING', df['precinct'])
df['candidate'] = df['candidate'].str.replace(' - UOCAVA TOTAL','')
df['candidate'] = df['candidate'].replace('ROQUE DE LA FUENTE','ROQUE "ROCKY" DE LA FUENTE')
# President and US senate to statewide district
df['district'] = np.where((df['office']=='US PRESIDENT')|(df['office']=='US SENATE'),
                         'STATEWIDE', df['district'])

# fixing STATEWIDE UOCAVA for federal elections
federal_dic = {'precinct': 'STATEWIDE UOCAVA', 'county_name': 'STATEWIDE UOCAVA', 'county_fips': '23000',
              'jurisdiction_name': 'STATEWIDE UOCAVA','jurisdiction_fips': '23000'}
fed = ['US PRESIDENT', 'US HOUSE', 'US SENATE']
for office in fed:
    for field in federal_dic.keys():
        df[field] = np.where((df['office']==office)&(df['mode']=='UOCAVA'), federal_dic[field],df[field])

# relocating dataverse to fix mult problem
df['dataverse'] = df.office.apply(get_dataverse)

# fixing US President districts
district_map=df[df['office']=='US HOUSE'][['precinct','county_name','district']].drop_duplicates()
district_map = district_map[district_map['precinct']!='STATEWIDE UOCAVA']
district_map['office'] = 'US PRESIDENT'
district_map.columns = ['precinct','county_name','district_new','office']
district_map.loc[len(district_map.index)] = ['Milo/Ornveille Twp', 'PISCATAQUIS', '002','US PRESIDENT'] # piscataquis is dist 2
df = df.merge(district_map, on = ['precinct','county_name','office'],
             how = 'left')
df['district'] = np.where(df['district_new'].notnull(), df['district_new'], df['district'])

# replacing UOCAVA pres votes with the correct district groupings
# Originally used county sheet, so now manually replacing with the right breakdowns from dist. sheet

#get uocava pres df, and correct candidate names
uocava_pres=df[((df['office']=='US PRESIDENT')&(df['precinct']=='STATEWIDE UOCAVA'))].drop(columns=['votes','district'])
correct_names=list(uocava_pres['candidate'])
#read sheets with uocava by district info
pres_dist1 = pd.read_excel('/Users/declanchin/Desktop/MEDSL/2020-precincts/precinct/ME/officialresults/presandvice1120.xlsx')
pres_dist2 = pd.read_excel('/Users/declanchin/Desktop/MEDSL/2020-precincts/precinct/ME/officialresults/presandvice1120.xlsx', sheet_name = 1)
#get dist 1 uocava df
dist1=pres_dist1[pres_dist1['MUNICIPALITY'] == 'UOCAVA Dist 1']
dist1 = dist1[['Biden, Joseph R.','De La Fuente, Roque','Hawkins, Howard','Jorgensen, Jo','Trump, Donald J.','Others','Blank']].T.reset_index().rename(columns={'index':'candidate',131:'votes'})
dist1['candidate'] = dist1['candidate'].replace(dict(zip(list(dist1['candidate']), correct_names)))
dist1['votes'] = dist1['votes'].astype(int)
dist1['district'] = '001'
dist1=uocava_pres.merge(dist1, on ='candidate', how = 'left')
#get dist 1 uocava df
dist2=pres_dist2[pres_dist2['MUNICIPALITY'] == 'UOCAVA CG2 Total']
dist2 = dist2[['Biden, Joseph R.','De La Fuente, Roque','Hawkins, Howard','Jorgensen, Jo','Trump, Donald J.','Others','Blank']].T.reset_index().rename(columns={'index':'candidate',423:'votes'})
dist2['candidate'] = dist2['candidate'].replace(dict(zip(list(dist2['candidate']), correct_names)))
dist2['votes'] = dist2['votes'].astype(int)
dist2['district'] = '002'
dist2=uocava_pres.merge(dist2, on ='candidate', how = 'left')
#dropping uocava pres columns and replacing with ones with district breakdown
df = df[~((df['office']=='US PRESIDENT')&(df['precinct']=='STATEWIDE UOCAVA'))]
df = pd.concat([df,dist1,dist2]).reset_index(drop=True)

#reassigning mode to total to match previous years
df['mode'] = 'TOTAL'

#upper and trim precinct
df['precinct'] = df['precinct'].str.upper().str.strip()

#making non-partisan local offices
df['party_detailed'] = np.where(((df['party_detailed']=='')&(df['candidate']!='UNDERVOTES')&(df['candidate']!='WRITEIN')), 
                                'NONPARTISAN',df['party_detailed'])
#reapply
df['party_simplified'] = df.party_detailed.apply(get_party_simplified)

In [354]:
df = df[['precinct', 'office', 'party_detailed', 'party_simplified', 'mode',
       'votes', 'county_name', 'county_fips', 'jurisdiction_name',
       'jurisdiction_fips', 'candidate', 'district', 'magnitude', 'dataverse',
       'year', 'stage', 'state', 'special', 'writein', 'state_po',
       'state_fips', 'state_cen', 'state_ic', 'date', 'readme_check']]

df.to_csv('2020-me-precinct-general.csv',index=False, quoting=csv.QUOTE_NONNUMERIC)

In [None]:
#knox budget committee non-partisan,https://www.mainelegislature.org/legis/statutes/30-A/title30-Asec751.html
#aroostook finance committee non partisan, http://legislature.maine.gov/statutes/30-A/title30-Asec739.html
# CC nonpartisan