In [109]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pymongo as pm
import pandas as pd
import numpy as np
from datetime import datetime
import urllib2
from bokeh.io import curdoc, output_file, show, vform, output_notebook
from bokeh.layouts import row, column
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import PreText, Select, MultiSelect, CheckboxButtonGroup
from bokeh.plotting import figure

### General Bokeh Workflow:

- Prepare some data (in this case plain python lists).
- Tell Bokeh where to generate output (in this case using output_file(), with the filename "lines.html").
- Call figure() to create a plot with some overall options like title, tools and axes labels.
- Add renderers (in this case, Figure.line) for our data, with visual customizations like colors, legends and widths to the plot.
- Ask Bokeh to show() or save() the results.

In [5]:
mongo_uri = "mongodb://ec2-52-38-154-245.us-west-2.compute.amazonaws.com:27017"
client = pm.MongoClient(mongo_uri)
db = client['prod']

In [10]:
debates = pd.DataFrame(list(db.debates.find({})))

In [9]:
def lru_cache():
    def dec(f):
        def _(*args, **kws):
            return f(*args, **kws)
        return _
    return dec

In [11]:
performance = pd.DataFrame(list(db.performance.find({})))

In [12]:
performance.head()

Unnamed: 0,_id,approval_rates,date,debate_id,percent_change_sp500,speakers
0,57f9a3cf799a2f85174431ff,"{u'pre': {u'republican': 47, u'democrat': 47},...",1960-09-26,0,-0.226159,"{u'republican': {u'election_result': u'lose', ..."
1,57f9a3cf799a2f8517443200,"{u'pre': {u'republican': 47, u'democrat': 46},...",1960-10-07,1,0.020369,"{u'republican': {u'election_result': u'lose', ..."
2,57f9a3d0799a2f8517443201,"{u'pre': {u'republican': 46, u'democrat': 49},...",1960-10-13,2,0.531428,"{u'republican': {u'election_result': u'lose', ..."
3,57f9a3d0799a2f8517443202,"{u'pre': {u'republican': 40, u'democrat': 51},...",1976-09-23,3,-0.112233,"{u'republican': {u'election_result': u'lose', ..."
4,57f9a3d0799a2f8517443203,"{u'pre': {u'republican': 45, u'democrat': 47},...",1976-10-06,4,0.60241,"{u'republican': {u'election_result': u'lose', ..."


In [27]:
data = get_data("performance")

In [28]:
data

Unnamed: 0,_id,approval_rates,date,debate_id,percent_change_sp500,speakers
0,57f9a3cf799a2f85174431ff,"{u'pre': {u'republican': 47, u'democrat': 47},...",1960-09-26,0,-0.226159,"{u'republican': {u'election_result': u'lose', ..."
1,57f9a3cf799a2f8517443200,"{u'pre': {u'republican': 47, u'democrat': 46},...",1960-10-07,1,0.020369,"{u'republican': {u'election_result': u'lose', ..."
2,57f9a3d0799a2f8517443201,"{u'pre': {u'republican': 46, u'democrat': 49},...",1960-10-13,2,0.531428,"{u'republican': {u'election_result': u'lose', ..."
3,57f9a3d0799a2f8517443202,"{u'pre': {u'republican': 40, u'democrat': 51},...",1976-09-23,3,-0.112233,"{u'republican': {u'election_result': u'lose', ..."
4,57f9a3d0799a2f8517443203,"{u'pre': {u'republican': 45, u'democrat': 47},...",1976-10-06,4,0.60241,"{u'republican': {u'election_result': u'lose', ..."
5,57f9a3d0799a2f8517443204,"{u'pre': {u'republican': 41, u'democrat': 47},...",1976-10-22,5,0.110044,"{u'republican': {u'election_result': u'lose', ..."
6,57f9a3d0799a2f8517443205,"{u'pre': {u'republican': 38, u'democrat': 39},...",1980-09-21,6,0.889749,"{u'independent': {u'election_result': u'lose',..."
7,57f9a3d1799a2f8517443206,"{u'pre': {u'republican': 39, u'democrat': 45},...",1980-10-28,7,-0.109332,"{u'republican': {u'election_result': u'win', u..."
8,57f9a3d1799a2f8517443207,"{u'pre': {u'republican': 58, u'democrat': 38},...",1984-10-07,8,-0.338087,"{u'republican': {u'election_result': u'win', u..."
9,57f9a3d1799a2f8517443208,"{u'pre': {u'republican': 56, u'democrat': 39},...",1984-10-21,9,-0.357228,"{u'republican': {u'election_result': u'win', u..."


In [30]:
performance_source = ColumnDataSource(data)

(1960, 1976, 1980, 1984, 1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 1980)

In [99]:
speaker_queries = pd.DataFrame(list(db.speakers.find({})))
names = sorted(list(name.encode('utf-8','ignore') for name in speaker_queries["name"].unique()))
states = sorted(list(state.encode('utf-8','ignore') for state in speaker_queries["state"].unique()))
years = sorted(list(set(list(str(date.year) for date in speaker_queries[::2]['year']))))

19
12
12


In [214]:
names

['Al Gore',
 'Barack Obama',
 'Bill Clinton',
 'Bob Dole',
 'Donald Trump',
 'George H.W. Bush',
 'George W. Bush',
 'Gerald Ford',
 'Hillary Clinton',
 'Jimmy Carter',
 'John B. Anderson',
 'John F. Kennedy',
 'John Kerry',
 'John McCain',
 'Michael Dukakis',
 'Mitt Romney',
 'Richard Nixon',
 'Ronald Reagan',
 'Walter Mondale']

In [205]:
party = CheckboxButtonGroup(labels=['Republican','Democrat','Independent'], active=[0,1,2])
name = CheckboxButtonGroup(labels=names, active=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18])
prior_experience = CheckboxButtonGroup(labels=["Incumbent", "Challenger"], active=[0,1])
election_result = CheckboxButtonGroup(labels=["Win","Lose"], active =[0,1])
state = CheckboxButtonGroup(labels=states, active=[0,1,2,3,4,5,6,7,8,9,10,11])
year = CheckboxButtonGroup(labels=years, active=[0,1,2,3,4,5,6,7,8,9,10,11])

In [112]:
show(year)

In [110]:
output_notebook()

In [216]:
name.active = [0,1]

In [203]:
def speaker_filter(party_arr, prior_experience_arr, election_result_arr, name_arr, state_arr, date_max, date_min):
    query = {}
    query['party'] = {'$in': party_arr}
    query['incumbent'] = {'$in': prior_experience_arr}
    query['election_result'] = {'$in': election_result_arr}
    query['name'] = {'$in': name_arr}
    query['state'] = {'$in': state_arr}
    query['year'] = {'$lt': date_max, '$gt': date_min}
    mongo_data = list(db.speakers.find(query, {'speaker_id':1}))
    ret_list = []
    for val in mongo_data:
        ret_list.append(val['speaker_id'].encode('utf-8','ignore'))
    return ret_list

In [211]:
def update_performance_data(selected=None):
    party_arr = [party.labels[i] for i in party.active]
    prior_experience_arr = [True if prior_experience.labels[i] == "Incumbent" else False for i in prior_experience.active]
    election_result_arr = [election_result.labels[i].lower() for i in election_result.active]
    name_arr = [name.labels[i] for i in name.active]
    state_arr = [state.labels[i] for i in state.active]
    date_max = datetime((max([int(year.labels[i]) for i in year.active]) + 1),1,1)
    date_min = datetime((min([int(year.labels[i]) for i in year.active]) - 1),12,31)
    speaker_list = speaker_filter(party_arr, prior_experience_arr, election_result_arr, name_arr, state_arr, date_max, date_min)
    performance_data = pd.DataFrame(list(db.performance.find({'$or':[{'speakers.republican.speaker_id':{'$in': speaker_list}}, 
                                                                     {'speakers.democrat.speaker_id':{'$in': speaker_list}}, 
                                                                     {'speakers.independent.speaker_id':{'$in': speaker_list}}]})))
    return performance_data

In [None]:
def update_debate_data(selected=None):
    party_arr = [party.labels[i] for i in party.active]
    prior_experience_arr = [True if prior_experience.labels[i] == "Incumbent" else False for i in prior_experience.active]
    election_result_arr = [election_result.labels[i].lower() for i in election_result.active]
    name_arr = [name.labels[i] for i in name.active]
    state_arr = [state.labels[i] for i in state.active]
    date_max = datetime((max([int(year.labels[i]) for i in year.active]) + 1),1,1)
    date_min = datetime((min([int(year.labels[i]) for i in year.active]) - 1),12,31)
    speaker_list = speaker_filter(party_arr, prior_experience_arr, election_result_arr, name_arr, state_arr, date_max, date_min)
    debate_data = pd.DataFrame(list(db.debates.find({'$or':[{'speakers.republican.speaker_id':{'$in': speaker_list}}, 
                                                                     {'speakers.democrat.speaker_id':{'$in': speaker_list}}, 
                                                                     {'speakers.independent.speaker_id':{'$in': speaker_list}}]})))
    return debate_data

In [213]:
x.head()

Unnamed: 0,_id,approval_rates,date,debate_id,percent_change_sp500,speakers
0,57f9a3cf799a2f85174431ff,"{u'pre': {u'republican': 47, u'democrat': 47},...",1960-09-26,0,-0.226159,"{u'republican': {u'election_result': u'lose', ..."
1,57f9a3cf799a2f8517443200,"{u'pre': {u'republican': 47, u'democrat': 46},...",1960-10-07,1,0.020369,"{u'republican': {u'election_result': u'lose', ..."
2,57f9a3d0799a2f8517443201,"{u'pre': {u'republican': 46, u'democrat': 49},...",1960-10-13,2,0.531428,"{u'republican': {u'election_result': u'lose', ..."
3,57f9a3d0799a2f8517443202,"{u'pre': {u'republican': 40, u'democrat': 51},...",1976-09-23,3,-0.112233,"{u'republican': {u'election_result': u'lose', ..."
4,57f9a3d0799a2f8517443203,"{u'pre': {u'republican': 45, u'democrat': 47},...",1976-10-06,4,0.60241,"{u'republican': {u'election_result': u'lose', ..."


In [75]:
data

Unnamed: 0,_id,approval_rates,date,debate_id,percent_change_sp500,speakers
0,57f9a3cf799a2f85174431ff,"{u'pre': {u'republican': 47, u'democrat': 47},...",1960-09-26,0,-0.226159,"{u'republican': {u'election_result': u'lose', ..."
1,57f9a3cf799a2f8517443200,"{u'pre': {u'republican': 47, u'democrat': 46},...",1960-10-07,1,0.020369,"{u'republican': {u'election_result': u'lose', ..."
2,57f9a3d0799a2f8517443201,"{u'pre': {u'republican': 46, u'democrat': 49},...",1960-10-13,2,0.531428,"{u'republican': {u'election_result': u'lose', ..."
3,57f9a3d0799a2f8517443202,"{u'pre': {u'republican': 40, u'democrat': 51},...",1976-09-23,3,-0.112233,"{u'republican': {u'election_result': u'lose', ..."
4,57f9a3d0799a2f8517443203,"{u'pre': {u'republican': 45, u'democrat': 47},...",1976-10-06,4,0.60241,"{u'republican': {u'election_result': u'lose', ..."
5,57f9a3d0799a2f8517443204,"{u'pre': {u'republican': 41, u'democrat': 47},...",1976-10-22,5,0.110044,"{u'republican': {u'election_result': u'lose', ..."
6,57f9a3d0799a2f8517443205,"{u'pre': {u'republican': 38, u'democrat': 39},...",1980-09-21,6,0.889749,"{u'independent': {u'election_result': u'lose',..."
7,57f9a3d1799a2f8517443206,"{u'pre': {u'republican': 39, u'democrat': 45},...",1980-10-28,7,-0.109332,"{u'republican': {u'election_result': u'win', u..."
8,57f9a3d1799a2f8517443207,"{u'pre': {u'republican': 58, u'democrat': 38},...",1984-10-07,8,-0.338087,"{u'republican': {u'election_result': u'win', u..."
9,57f9a3d1799a2f8517443208,"{u'pre': {u'republican': 56, u'democrat': 39},...",1984-10-21,9,-0.357228,"{u'republican': {u'election_result': u'win', u..."


In [None]:
def get_data(collection, query = {}):
    coll = db[collection]
    data = pd.DataFrame(list(coll.find(query)))
    return data