# District distances

**Procedure**
1. create a DF of the congress with
    1. all twitter id pairs 
    2. all district pairs
2. use geopandas to calculate distance between districts

In [10]:
import pandas as pd
from congress import Congress
import numpy as np

In [11]:
congress = Congress('qa1qfTzGinGGLcNCbinp2OTlI2SvzBe4KFgxYobm')

### Load the congress data into DF

In [12]:
#load the congress data
df_house = pd.DataFrame(congress.members.filter('house', congress=115)[0]['members'])

In [13]:
#drop rows if twitter_account == None
house_twonly = df_house.dropna(subset=['twitter_account'])
#drop those who are not in office anymore
house_twonly = house_twonly[house_twonly.in_office].reset_index(drop=True)

### Create pairs for each congresswomen

- for each member a (1 row), take another member b and add her information (with concat)

In [14]:
lastcol = house_twonly.shape[0]
member_pairs = pd.DataFrame()
df_list = []

for i in range(lastcol):
    mem = pd.DataFrame([house_twonly.iloc[i,:]]*(lastcol)).reset_index(drop=True) #create df with member i 
    mem = pd.concat([mem, house_twonly], axis=1) #merge member and others
    mem = mem.drop(mem.index[i]).reset_index(drop=True) #remove relf-reference
    df_list.append(mem) #store in list containing dataframes
    
#merge them all together
member_pairs = pd.concat(df_list) 

In [15]:
member_pairs = member_pairs.reset_index(drop=True)

In [16]:
member_pairs.twitter_account.tail(3)

Unnamed: 0,twitter_account,twitter_account.1
178503,RepLeeZeldin,RepTedYoho
178504,RepLeeZeldin,RepDavidYoung
178505,RepLeeZeldin,RepDonYoung


In [17]:
member_pairs.to_csv('member_pairs.csv')

### Load voting districts into geopandas

In [1]:
import json
import os
import requests

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# spatial stuff
import geopandas as gpd
import fiona
import folium
import shapely

- shapefiles from https://www.census.gov/geo/maps-data/data/cbf/cbf_cds.html

In [2]:
#os.chdir("..\")
os.getcwd()

'C:\\Users\\Philipp\\GDrive\\Coding\\Python\\SocialDataScience\\Project_tsds\\Philipp'

In [3]:
ppath = 'C:/Users/Philipp/GDrive/Coding/Python/SocialDataScience/Project_tsds/rawdata/congress_disticts2017_500k/'
districts = gpd.read_file(ppath+'cb_2017_us_cd115_500k.shp')

In [4]:
districts.head(3)
#GEOID or STATEFP gives congressional districts

Unnamed: 0,STATEFP,CD115FP,AFFGEOID,GEOID,LSAD,CDSESSN,ALAND,AWATER,geometry
0,13,9,5001500US1309,1309,C2,115,13497964615,411754565,"POLYGON ((-84.65622499999999 34.730984, -84.65..."
1,19,4,5001500US1904,1904,C2,115,58937921470,264842664,"POLYGON ((-96.63970399999999 42.737071, -96.63..."
2,17,10,5001500US1710,1710,C2,115,777307694,31695461,"POLYGON ((-88.19882 42.41557, -88.198601 42.41..."


### Match districts via geoid 

In [5]:
#generate centroids
def get_centroids(x):
    return x.centroid

districts['centroids'] = districts.geometry.apply(get_centroids)

In [6]:
districts.centroids[0].distance(districts.centroids[1])

13.634726788599925

In [7]:
districts.geometry[0].distance(districts.geometry[1])

10.771112182700213

>Do we better use centroids or polygons to calculate distances?

1. load data
1. take geoid of member A and member B
    1. there are two columns of geoid, the first is the reference member and the second all other matched members
2. insert centroids
2. calculate distances of centroids

In [73]:
member_pairs

Unnamed: 0,api_uri,at_large,contact_form,crp_id,cspan_id,date_of_birth,district,dw_nominate,facebook_account,fax,...,title,total_present,total_votes,twitter_account,url,votes_with_party_pct,votesmart_id,youtube_account,geo_a,geo_b
0,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepAdams,https://adams.house.gov,95.62,5935,,POINT (-91.82491765269089 31.7677401747569),
1,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,Robert_Aderholt,https://aderholt.house.gov,96.70,441,RobertAderholt,POINT (-91.82491765269089 31.7677401747569),
2,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,reppeteaguilar,https://aguilar.house.gov,92.05,70114,,POINT (-91.82491765269089 31.7677401747569),
3,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,reprickallen,https://allen.house.gov,97.75,136062,,POINT (-91.82491765269089 31.7677401747569),
4,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,2.0,940.0,MarkAmodeiNV2,https://amodei.house.gov,95.61,12537,markamodeinv2,POINT (-91.82491765269089 31.7677401747569),
5,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepArrington,https://arrington.house.gov,98.39,155685,,POINT (-91.82491765269089 31.7677401747569),
6,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepBrianBabin,https://babin.house.gov,95.35,360,,POINT (-91.82491765269089 31.7677401747569),
7,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepDonBacon,https://bacon.house.gov,96.06,166299,,POINT (-91.82491765269089 31.7677401747569),POINT (-96.15977113285999 41.22942767258883)
8,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepJimBanks,https://banks.house.gov,96.91,116801,,POINT (-91.82491765269089 31.7677401747569),
9,https://api.propublica.org/congress/v1/members...,False,,N00036633,76236,1954-09-16,5,0.497,CongressmanRalphAbraham,202-225-5639,...,Representative,0.0,940.0,RepLouBarletta,https://barletta.house.gov,95.06,47143,reploubarletta,POINT (-91.82491765269089 31.7677401747569),


In [83]:
districts[districts.GEOID == '3202'].centroids.reset_index(drop=True)

0    POINT (-117.3060329020225 40.64564001854976)
Name: centroids, dtype: object

In [80]:
member_pairs.geoid.iloc[:5,1]

0    3712
1    0104
2    0631
3    1312
4    3202
Name: geoid, dtype: object

In [78]:
def insert_geoinfo(gid):
    #find geoinformation of A and of B in districts df, append to member_pairs df 
    geoinfo = districts[districts.GEOID == gid].centroids.reset_index(drop=True)
    return geoinfo

#df_list = [insert_geoinfo]

member_pairs['geo_a'] = member_pairs.geoid.iloc[:10,0].apply(insert_geoinfo) #reference member
member_pairs['geo_b'] = member_pairs.geoid.iloc[:5,1].apply(insert_geoinfo) #all other members

In [79]:
member_pairs.geo_b

0         POINT (-80.8553640882194 35.27409102666888)
1                                                 NaN
2                                                 NaN
3                                                 NaN
4                                                 NaN
5                                                 NaN
6                                                 NaN
7                                                 NaN
8                                                 NaN
9                                                 NaN
10                                                NaN
11                                                NaN
12                                                NaN
13                                                NaN
14                                                NaN
15                                                NaN
16                                                NaN
17                                                NaN
18                          

In [None]:
#calculate distance between geo_a and geo_b (both centroids)
def get_dist(centr_a, centr_b):
    dist = centr_a.distance(member_pairs.geo_b)
    return dist
member_pairs['distance'] = member_pairs.geo_a.apply(get_dist)

In [None]:
centroids