In [8]:
import io
import os
import re
import pandas as pd

In [9]:
r_debates = []
d_debates = []

# get republicans
for f in os.listdir("debates/"):
    if f.endswith(".txt"):
        f_path = 'debates/{0}'.format(f)
        if '2015' in f or '2016' in f:
            if 'R_' in f:
                with io.open(f_path,'r',encoding='utf8') as f:
                    r_debates.append(f.read())
            elif 'D_' in f:
                with io.open(f_path,'r',encoding='utf8') as f:
                    d_debates.append(f.read())

In [10]:
len(r_debates)

12

In [12]:
d = r_debates[0]

In [13]:
d

u'PARTICIPANTS:\nFormer Governor Jeb Bush (FL);Ben Carson;Governor Chris Christie (NJ);Senator Ted Cruz (TX);Former Governor Mike Huckabee (AR);Governor John Kasich (OH);Senator Rand Paul (KY);Senator Marco Rubio (FL);Donald Trump;Governor Scott Walker (WI);\nMODERATORS:\nBret Baier (Fox News);Megyn Kelly (Fox News); and Chris Wallace (Fox News)\nKELLY:\nWelcome to the first debate night of the 2016 presidential campaign, live from Quicken Loans Arena in Cleveland, Ohio. I\'m Megyn Kelly... [applause]... along with my co-moderators, Brett Baier and Chris Wallace. Tonight... [applause] Nice. Tonight, thousands of people here in the Q, along with millions of voters at home will get their very first chance to see the candidates face off in a debate, answering the questions you want answered.\nBAIER:\nLess than a year from now, in this very arena, one of these 10 candidates or one of the seven on the previous debate tonight will accept the Republican party\'s nomination. [applause] Tonight

In [13]:
turns = []
debate_turns = re.split("[A-Z]+:",d)
turns += debate_turns[3:]
applause = []
for turn in turns:
    if '[applause]' in turn:
        applause.append('1')
    else:
        applause.append('0')
len(applause), len(turns)

(316, 316)

In [25]:
r_applause_dict = {'applause': applause, 'turn': turns}
r_applause_df = pd.DataFrame(r_applause_dict)
r_applause_df.to_csv('data/first_r_applause.csv', encoding='utf-8', index_col=False)

In [43]:
turns = []
for debate in r_debates:
    debate_turns = re.split("[A-Z]+:",debate)
    turns += debate_turns[3:]

In [46]:
applause = []
for turn in turns:
    if '[applause]' in turn:
        applause.append('1')
    else:
        applause.append('0')

In [47]:
len(applause), len(turns)

(4749, 4749)

In [48]:
r_applause_dict = {'applause': applause, 'turn': turns}
r_applause_df = pd.DataFrame(r_applause_dict)
r_applause_df.to_csv('data/r_applause.csv', encoding='utf-8')

In [97]:
import httplib2, argparse, os, sys, json
from oauth2client import tools, file, client
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient import discovery
from googleapiclient.errors import HttpError

#Project and model configuration
project_id = '176776777821'
model_id = 'first-republican-debate-model'
storageDataLocation = 'applause-prediction/first_r_applause.csv'
# model_id = 'republican-debate-model'
# storageDataLocation = 'applause-prediction/r_applause.csv'

#activity labels
labels = {
    '0': 'No Applause', '1': 'Applause' 
}

def main():
    """ Simple logic: train and make prediction """
    analysis = ''
    try:
        train_model()
        analysis = make_prediction()
    except HttpError as e: 
        if e.resp.status == 404: #model does not exist
            print("Model does not exist yet.")
            train_model()
            analysis = make_prediction()
        else: #real error
            print(e)
    return analysis


def make_prediction(model_id):
    """ Use trained model to generate a new prediction """

    api = get_prediction_api()

    print("Fetching model.")

    model = api.trainedmodels().get(project=project_id, id=model_id).execute()

    if model.get('trainingStatus') != 'DONE':
        print("Model is (still) training. \nPlease wait and run me again!") #no polling
        return None

    else:
        print("Model is ready.")

    
    #Optionally analyze model stats (big json!)
        analysis = api.trainedmodels().analyze(project=project_id, id=model_id).execute()
    #print(analysis)
    #exit()
    
        return analysis
    
    '''
    #read new record from local file
    with open('./data/records_100.csv') as f:
        record = f.readline().split(',') #csv

    #obtain new prediction
    prediction = api.trainedmodels().predict(project=project_id, id=model_id, body={
        'input': {
            'csvInstance': record
        },
    }).execute()

    #retrieve classified label and reliability measures for each class
    label = prediction.get('outputLabel')
    stats = prediction.get('outputMulti')

    #show results
    print("You are currently %s (class %s)." % (labels[label], label) ) 
    print(stats)
    '''


def train_model(model_id, storageDataLocation):
    """ Create new classification model """

    api = get_prediction_api()

    print("Creating new Model.")

    api.trainedmodels().insert(project=project_id, body={
        'id': model_id,
        'storageDataLocation': storageDataLocation,
        'modelType': 'CLASSIFICATION'
    }).execute()


def get_prediction_api(service_account=True):
    scope = [
        'https://www.googleapis.com/auth/prediction',
        'https://www.googleapis.com/auth/devstorage.read_only'
    ]
    return get_api('prediction', scope, service_account)


def get_api(api, scope, service_account=True):
    """ Build API client based on oAuth2 authentication """
    
    credentials = ServiceAccountCredentials.from_p12_keyfile(
        'presidents-1333@appspot.gserviceaccount.com',
        './presidents-9f2b47beb322.p12', 
        scopes=scope
    )

    #wrap http with credentials
    http = credentials.authorize(httplib2.Http())
    return discovery.build(api, "v1.6", http=http)


In [95]:
# model_id = 'first-republican-debate-model'
# storageDataLocation = 'applause-prediction/first_r_applause.csv'
model_id = 'republican-debate-model'
storageDataLocation = 'applause-prediction/r_applause.csv'

In [98]:
train_model(model_id, storageDataLocation)

Creating new Model.


In [112]:
model_id = 'republican-debate-model'
storageDataLocation = 'applause-prediction/r_applause.csv'
big_republican_analysis = make_prediction(model_id)

Fetching model.
Model is ready.


In [115]:
big_republican_analysis

{u'dataDescription': {u'features': [{u'index': u'0',
    u'text': {u'count': u'5681'}}],
  u'outputFeature': {u'numeric': {u'count': u'4748',
    u'mean': u'0.18',
    u'variance': u'0.15'},
   u'text': [{u'count': u'1', u'value': u'-- this campaign'},
    {u'count': u'1', u'value': u'... O.K.'},
    {u'count': u'1', u'value': u'... on defeating our enemies. So'},
    {u'count': u'1', u'value': u"... that's your line"},
    {u'count': u'1', u'value': u'... when we join together. Thank you'},
    {u'count': u'1', u'value': u'...I remember'},
    {u'count': u'3889', u'value': u'0'},
    {u'count': u'859', u'value': u'1'},
    {u'count': u'1', u'value': u'A complete disaster'},
    {u'count': u'1', u'value': u'A little of your own medicine there'},
    {u'count': u'2', u'value': u'Actually'},
    {u'count': u'9', u'value': u'All right'},
    {u'count': u'1',
     u'value': u"All right. Obviously there's a lot more to talk about this. We're going to have more questions for the candidates o

In [83]:
model_id = 'first-republican-debate-model'
storageDataLocation = 'applause-prediction/first_r_applause.csv'
analysis = make_prediction(model_id)

Fetching model.
Model is ready.


In [113]:
# analysis['modelDescription']['confusionMatrix']['0'].keys()

In [65]:
confusion_matrix = analysis['modelDescription']['confusionMatrix']

In [66]:
total_negative = float(confusion_matrix['0']['0']) + float(confusion_matrix['0']['1'])
total_positive = float(confusion_matrix['1']['0']) + float(confusion_matrix['1']['1'])

p_true_negative = float(confusion_matrix['0']['0']) / total_negative
p_false_positive = float(confusion_matrix['0']['1']) / total_negative
p_false_negative = float(confusion_matrix['1']['0']) / total_positive
p_true_positive = float(confusion_matrix['1']['1']) / total_positive

print('percentage true negative: {0}'.format(p_true_negative))
print('percentage false positive: {0}'.format(p_false_positive))
print('percentage false negative: {0}'.format(p_false_negative))
print('percentage true positive: {0}'.format(p_true_positive))

percentage true negative: 0.665399239544
percentage false positive: 0.334600760456
percentage false negative: 0.13768115942
percentage true positive: 0.86231884058


In [114]:
confusion_matrix = big_republican_analysis['modelDescription']['confusionMatrix']
total_negative = float(confusion_matrix['0']['0']) + float(confusion_matrix['0']['1'])
total_positive = float(confusion_matrix['1']['0']) + float(confusion_matrix['1']['1'])

p_true_negative = float(confusion_matrix['0']['0']) / total_negative
p_false_positive = float(confusion_matrix['0']['1']) / total_negative
p_false_negative = float(confusion_matrix['1']['0']) / total_positive
p_true_positive = float(confusion_matrix['1']['1']) / total_positive

print('percentage true negative: {0}'.format(p_true_negative))
print('percentage false positive: {0}'.format(p_false_positive))
print('percentage false negative: {0}'.format(p_false_negative))
print('percentage true positive: {0}'.format(p_true_positive))

KeyError: 'confusionMatrix'

In [37]:
def test_prediction(model_id):   
    
    """ Use trained model to generate a new prediction """

    api = get_prediction_api()

    print("Fetching model.")

    model = api.trainedmodels().get(project=project_id, id=model_id).execute()
    #read new record from local file
    with open('./data/whatif_sentences.csv') as f:
        record = f.readline().split(',') #csv

    #obtain new prediction
    prediction = api.trainedmodels().predict(project=project_id, id=model_id, body={
        'input': {
            'csvInstance': record
        },
    }).execute()

    #retrieve classified label and reliability measures for each class
    label = prediction.get('outputLabel')
    stats = prediction.get('outputMulti')

    #show results
    print("You are currently %s (class %s)." % (labels[label], label) ) 
    print(stats)

In [49]:
api = get_prediction_api()

print("Fetching model.")

model = api.trainedmodels().get(project=project_id, id=model_id).execute()

whatif_df = pd.read_csv('./data/whatif_sentences.csv', encoding='utf-8')

Fetching model.


In [57]:
#obtain new prediction
records = ['I am pro life', 'We have a responsibility to the poor', "The wall just got 10 feet taller"]
for record in records:
    prediction = api.trainedmodels().predict(project=project_id, id=model_id, body={
        'input': {
            'csvInstance': [record]
        },
    }).execute()

    print('{0}: {1}'.format(record, prediction['outputLabel']))
    print('\n')

I am pro life: 1


We have a responsibility to the poor: 0


The wall just got 10 feet taller: 1




In [89]:
model_id = 'first-republican-debate-model'
storageDataLocation = 'applause-prediction/first_r_applause.csv'

#obtain new prediction
records = [
    'I am pro life', 
    'We have a responsibility to the poor', 
    "The wall just got 10 feet taller",
    "I like pancakes",
    "Pancakes are horrible"]
for record in records:
    prediction = api.trainedmodels().predict(project=project_id, id=model_id, body={
        'input': {
            'csvInstance': [record]
        },
    }).execute()

    print('{0}: {1}'.format(record, prediction['outputLabel']))
    print('\n')

I am pro life: 1


We have a responsibility to the poor: 0


The wall just got 10 feet taller: 1


I like pancakes: 1


Pancakes are horrible: 1


Socialism is good: 1


