# Virginia Analysis 
### December 2019 
### Aaron Barden, Hannah Wheelan, Hope Johnson

In [8]:
# set up libraries
import pandas as pd
import numpy as np
import os
import gerrymetrics as g
import pathlib

# set up path
path = pathlib.Path.home() / "projects" / "VA" / "2019" / "citizens report"

# set up gerrymetrics variables
metric_dict = {'t_test_diff':            g.t_test_diff,
               'mean_median_diff':       g.mean_median,
               'declination':            g.declination,
               'efficiency_gap':         g.EG,
               'partisan_bias':          g.partisan_bias}
min_districts = 5
min_year=1972
competitiveness_threshold = .65 # needs to be above .5

Below I read in the data and re-shape it for data analysis. 
Note that the 2019 results are from the House of Representatives and represent the 2-party vote share. The 2017 results are from the House of Representatives and represent th

In [9]:
base_dat_path =  path / "2019 Hypothetical Results - Gerrymetrics data.csv"
dat = pd.read_csv(base_dat_path, header=1)
dat.head()
dat = dat.rename(columns={
    "District": "District",
    "D votes": "Dem Votes-2017",
    "Dshare": "Dshare-2017", 
    "Rvotes": "GOP Votes-2017",
    "Rshare": "Rshare-2017",
    "D votes.1": "Dem Votes-2019",
    "Dshare.1": "Dshare-2019",
    "R votes.1": "GOP Votes-2019",
    "Rshare.1": "Rshare-2019",
    "affected": "affected"})

dat['Dshare-2017'] = dat['Dshare-2017'].str.replace('%', '')
dat['Dshare-2019'] = dat['Dshare-2019'].str.replace('%', '')
dat['Rshare-2017'] = dat['Rshare-2017'].str.replace('%', '')
dat['Rshare-2019'] = dat['Rshare-2019'].str.replace('%', '')

dat.head()


Unnamed: 0,District,Dem Votes-2017,Dshare-2017,R votes,Rshare-2017,Dem Votes-2019,Dshare-2019,GOP Votes-2019,Rshare-2019,affected
0,1,4639,23.81,14848,76.19,0,0.0,16748,100.0,0
1,2,13366,63.14,7803,36.86,11828,61.0,7563,39.0,0
2,3,3759,21.69,13572,78.31,0,0.0,17099,100.0,0
3,4,0,0.0,15282,100.0,8461,37.03,14389,62.97,0
4,5,0,0.0,18402,100.0,0,0.0,18490,100.0,0


In [10]:
dat_long = pd.wide_to_long(dat, ['Dshare', 'Rshare', 'Dem Votes', 'GOP Votes'], i='District', j='Year', sep='-')
dat_long[['Dshare', 'Rshare']] = dat_long[['Dshare', 'Rshare']].apply(pd.to_numeric)
dat_long['D Voteshare'] = dat_long['Dshare']/100
dat_long['Rshare'] = dat_long['Rshare']/100
dat_long = dat_long.reset_index()
dat_long['Party'] = np.where(dat_long['Dshare'] > .5, 'D', 'R')
# dat_long['Dem Votes'] = 0
# dat_long['GOP Votes'] = 0
dat_long['State'] = 'VA'
dat_long.head()


Unnamed: 0,District,Year,R votes,affected,Dshare,Rshare,Dem Votes,GOP Votes,D Voteshare,Party,State
0,1,2017,14848,0,23.81,0.7619,4639,,0.2381,D,VA
1,1,2019,14848,0,0.0,1.0,0,16748.0,0.0,R,VA
2,2,2017,7803,0,63.14,0.3686,13366,,0.6314,D,VA
3,2,2019,7803,0,61.0,0.39,11828,7563.0,0.61,D,VA
4,3,2017,13572,0,21.69,0.7831,3759,,0.2169,D,VA


## Create data sets

In [12]:
affected = dat_long['affected'] == 1
unaffected = dat_long['affected'] == 0
year_2017 = dat_long['Year'] == 2017
year_2019 = dat_long['Year'] == 2019

dat_2017 = dat_long[year_2017]
dat_2017_affected = dat_long[year_2017 & affected]
dat_2017_unaffected = dat_long[year_2017 & unaffected]

dat_2019 = dat_long[year_2019]
dat_2019_affected = dat_long[year_2019 & affected]
dat_2019_unaffected = dat_long[year_2019 & unaffected]

dat_path_2017 = path / "dat_2017.csv"
dat_path_2017_affected = path / "dat_2017_affected.csv"
dat_path_2017_unaffected = path / "dat_2017_unaffected.csv"

dat_path_2019 = path / "dat_2019.csv"
dat_path_2019_affected = path / "dat_2019_affected.csv"
dat_path_2019_unaffected = path / "dat_2019_unaffected.csv"

dat_2017.to_csv(dat_path_2017)
dat_2017_affected.to_csv(dat_path_2017_affected)
dat_2017_unaffected.to_csv(dat_path_2017_unaffected)

dat_2019.to_csv(dat_path_2019)
dat_2019_affected.to_csv(dat_path_2019_affected)
dat_2019_unaffected.to_csv(dat_path_2019_unaffected)


In [13]:
def run_tests(data, title):
    parsed_res = g.parse_results(data)
    out_file = g.tests_df(g.run_all_tests(parsed_res, impute_val=1, metrics=metric_dict))
    file_name = title + ".csv"
    out_path = pathlib.Path.home() / "projects" / "VA" / "2019" / "citizens report" / file_name
    out_file.to_csv(out_path)

run_tests(dat_path_2017, "statewide_2017")
run_tests(dat_path_2017_affected, "affected_2017")
run_tests(dat_path_2017_unaffected, "unaffected_2017")

run_tests(dat_path_2019, "statewide_2019")
run_tests(dat_path_2019_affected, "affected_2019")
run_tests(dat_path_2019_unaffected, "unaffected_2019")


  if df.columns.contains('Dem Votes'):
100%|██████████| 1/1 [00:00<00:00, 132.05it/s]
  names=['Year', 'State'])
100%|██████████| 1/1 [00:00<00:00, 295.96it/s]
100%|██████████| 1/1 [00:00<00:00, 276.23it/s]
100%|██████████| 1/1 [00:00<00:00, 250.99it/s]
100%|██████████| 1/1 [00:00<00:00, 301.64it/s]
100%|██████████| 1/1 [00:00<00:00, 262.47it/s]
