In [None]:
from __future__ import print_function, division
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import congress_tools as cong

%matplotlib inline

In [None]:
#reading data 
with open("../data/fips2State.json","rb") as f:
    fips2state=json.load(f)
with open("../data/state2St.json","rb") as f:
    state2st=json.load(f)
with open("../data/st2Fips.json","rb") as f:
    st2fips=json.load(f)

In [None]:
states = []
for st in fips2state.values():
    try:
        states.append(state2st[st])
    except:
        pass

# Make sure you downloaded the DIME data from http://data.stanford.edu/dime/ and put the relevant csv in ../data/

In [None]:
# read in the DIME CFscore data

dimeData = "../data/candidate_cfscores_st_fed_1979_2012.csv"

try:
    main_df = pd.read_csv(dimeData, dtype=str)
except IOError as e:
    print("Unable to read DIME data. Make sure you have downloaded it and placed in ../data/")

In [None]:
election2012_df = main_df[(main_df.cycle=='2012') 
                          & (main_df.seat=='federal:house')
                          & (main_df.State.isin(states))
                          & (main_df.winner == "W")]

In [None]:
election2012 = election2012_df.ix[:,['State', 'Incum.Chall', 'cfscore', 'cfscores.dyn']]

def split2fips(district,st2fips):
    st = district[:2]
    fip_end = district[2:]
    return st2fips[st]+fip_end

election2012['ficd'] = election2012_df.District.apply(split2fips,args=(st2fips,))

election2012_df['Party'][election2012_df['Party'] == '100'] = 'D'
election2012_df['Party'][election2012_df['Party'] == '200'] = 'R'
election2012_df['Party'][election2012_df['Party'] == '328'] = 'I'
election2012['party'] = election2012_df.Party

lastName = []
firstName = []
for idx, n in election2012_df.Name.iteritems():
    lName, fName = cong.splitName(n)
    lastName.append(lName)
    firstName.append(fName)

election2012['lastName'] = lastName
election2012['firstName'] = firstName


In [None]:
# Change At-Large districts, stored as cd=01 in DIME, to cd=00

oneCD = ['AK', 'DE', 'MT', 'ND', 'SD', 'VT', 'WY']

count = -1
for idx, row in election2012.iterrows():
    count += 1
    if row['State'] in oneCD and row.ficd[2:] == '01':
        election2012.ficd.iloc[count] = election2012.ficd.iloc[count].replace('01','00')


In [None]:
election2012.set_index(['ficd'], inplace=True)

In [None]:
election2012[['cfscore']] = election2012[['cfscore']].astype(float)
election2012[['cfscores.dyn']] = election2012[['cfscores.dyn']].astype(float)

In [None]:
# load district summary dataframe

includeThirdParty = False

methodReapportion = 'state'
# methodReapportion = 'country'

year = 2012
# year = 2014

if year == 2014:
    congNum = 114
elif year == 2012:
    congNum = 113

# load the district dataframe, saved by preprocess.ipynb
filename = '../data/df_distSummary_%d_%sReapportion.pkl' % (year,methodReapportion)
df = pd.read_pickle(filename)

# turn votes into percentages

if includeThirdParty:
    totalVotes = df.nDemVotes_dist + df.nRepVotes_dist + df.nOthVotes_dist
else:
    totalVotes = df.nDemVotes_dist + df.nRepVotes_dist

df['demVotes_perc'] = (df.nDemVotes_dist / totalVotes) * 100
df['repVotes_perc'] = (df.nRepVotes_dist / totalVotes) * 100
if includeThirdParty:
    df['othVotes_perc'] = (df.nOthVotes_dist / totalVotes) * 100

df.set_index(['ficd'], inplace=True)

In [None]:
# see if there are any candidates in our data that aren't in the DIME data
c = 0
for idx, row in df.iterrows():
    #dimeCD = election2012[(election2012.ficd == row.ficd) & (election2012.lastName.str.contains(row.lastName))]
    dimeCD = election2012[(election2012.index == idx) & (election2012.lastName.str.contains(row.lastName))]
    if len(dimeCD) == 0:
        c += 1
        #print(row)
        #print('\n')
print(c)

In [None]:
# Al Green (TX09/4809) is missing from DIME data
ficd = '4809'

# print(df[df.ficd == ficd])
# print(election2012[election2012.ficd == ficd])

print(df[df.index == ficd])
print(election2012[election2012.index == ficd])

In [None]:
dfjoin = pd.merge(df, election2012, how='left', left_index=True, right_index=True)

In [None]:
# any duplicate indices?

for i in range(1,len(dfjoin)):
    if dfjoin.index[i] == dfjoin.index[i-1]:
        print(dfjoin.iloc[i])
        print('\n')

In [None]:
# save the new dataframe with all info

filename = '../data/df_distSummary_DIME_%d_%sReapportion.pkl' % (year, methodReapportion)
dfjoin.to_pickle(filename)