## Database set-up and additional EDA
In this notebook, I'll set up the database for use on the blog and some extra exploratory analysis with Bokeh. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import *
from bokeh.palettes import RdYlGn6 as palette
output_notebook()

In [13]:
senate = pd.read_csv('../data/senators114.csv', index_col=0)
votes = pd.read_csv('../data/cleaned_votes.csv', index_col=0)
bills = pd.read_csv('../data/all_bills.csv', index_col=0)

In [15]:
senate.head()

Unnamed: 0,lis_id,cspan_id,state,first_name,last_name,missed_votes,missed_votes_pct,next_election,party,senate_class,seniority,total_votes,votes_with_party_pct
0,S289,5,TN,Lamar,Alexander,14,2.79,2020,R,2,13,502,85.45
1,S340,95241,NH,Kelly,Ayotte,7,1.39,2016,R,3,5,502,79.39
2,S342,45465,MO,Roy,Blunt,22,4.38,2016,R,3,5,502,90.83
3,S223,2470,CA,Barbara,Boxer,67,13.35,2016,D,3,23,502,93.56
4,S307,5051,OH,Sherrod,Brown,11,2.19,2018,D,1,3,502,93.48


In [22]:
votes.tail()

Unnamed: 0,Alexander (R-TN),Ayotte (R-NH),Baldwin (D-WI),Barrasso (R-WY),Bennet (D-CO),Blumenthal (D-CT),Blunt (R-MO),Booker (D-NJ),Boozman (R-AR),Boxer (D-CA),...,Tillis (R-NC),Toomey (R-PA),Udall (D-NM),Vitter (R-LA),Warner (D-VA),Warren (D-MA),Whitehouse (D-RI),Wicker (R-MS),Wyden (D-OR),outcome
496,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,0,1,1,0,1
497,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,0,1
500,1,1,1,1,1,1,1,0,1,0,...,1,1,1,1,0,0,1,1,0,1
501,1,1,0,1,1,1,1,1,1,0,...,1,1,0,1,1,0,0,1,0,1
502,1,1,1,1,1,1,1,1,1,0,...,1,1,1,1,1,0,0,1,0,1


In [19]:
bills.head()

Unnamed: 0,issue,nay,question,result,title,vote_date,vote_number,yea
1,H.R. 26,66,On the Amendment,Rejected,Warren Amdt. No. 1; In the nature of a substit...,08-Jan,1,31
2,H.R. 26,4,On Passage of the Bill,Passed,H.R. 26; A bill to extend the termination date...,08-Jan,2,93
3,S. 1,32,On Cloture on the Motion to Proceed,Agreed to,Motion to Invoke Cloture on the Motion to Proc...,12-Jan,3,63
4,S. 1,42,On the Motion to Table,Agreed to,Motion to Table Markey Amdt. No. 13; To ensure...,20-Jan,4,57
5,S. 1,46,On the Motion to Table,Agreed to,Motion to Table Franken Amdt. No. 17; To requi...,20-Jan,5,53


Everything looks as I expect. 

In [16]:
# make a correlation map w Bokeh

correlations = votes.iloc[:, :-1].corr()
senators = list(votes.columns)[:-1]

hm = figure(title='114th US Senate, Voting Roll Call Correlation', 
            tools='wheel_zoom,box_zoom, save, reset', x_range=senators, y_range=senators, 
            height=900, width=1100, 
           )
hm.rect(senators, senators, width=1, height=1)
hm.xaxis.major_label_orientation = 3.14/3.9
show(hm)

In [18]:
# make another correlation map

import matplotlib.pyplot as plt
import seaborn as sns

def plot_correlation_map( df ):
    corr = df.corr()
    _ , ax = plt.subplots( figsize =( 32 , 20 ) )
    cmap = sns.diverging_palette( 220 , 10 , sep=90, as_cmap = True )
    _ = sns.heatmap(
        corr, 
        cmap = cmap,
        square=True, 
        cbar_kws={ 'shrink' : .9 }, 
        ax=ax,  

    )
    

plot_correlation_map(senate)
plt.savefig('correlation.jpg')

In [157]:
parties=[]
for col in list(votes.columns):
    p = col[-5:-4]
    parties.append(p)

parties = pd.Series(parties)
parties.name = 'party'
parties.index = list(votes.columns)

In [166]:
df = votes.append(parties)

In [173]:
senators_by_party = df.loc['party'].sort_values(ascending=False)[1:].index

In [180]:
srt = votes[senators_by_party].corr()