<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Random-Forest" data-toc-modified-id="Random-Forest-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Random Forest</a></span></li><li><span><a href="#Setup" data-toc-modified-id="Setup-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Complete-Model" data-toc-modified-id="Complete-Model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Complete Model</a></span></li><li><span><a href="#Model-for-Demo" data-toc-modified-id="Model-for-Demo-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Model for Demo</a></span></li><li><span><a href="#Demo" data-toc-modified-id="Demo-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Demo</a></span></li><li><span><a href="#start-here" data-toc-modified-id="start-here-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>start here</a></span></li><li><span><a href="#Tkinter" data-toc-modified-id="Tkinter-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Tkinter</a></span></li></ul></div>

# Random Forest

# Setup

In [22]:
import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)
pd.options.display.max_colwidth=300

In [23]:
target_label = ['Trump', 'Clinton', 'other', 'no vote']
target_two = ['Trump', 'Clinton']
target_three = ['Trump', 'Clinton', 'other']

In [24]:
#df_a = pd.read_csv('a_data.csv')
%store -r df_a
df = df_a
#dfn_a = pd.read_csv('a_data_labeled.csv')
%store -r dfn_a
dfn = dfn_a
labels = pd.read_csv("labels.csv", index_col=0).to_dict('dict')['0']

cces = pd.io.stata.StataReader('data/CCES16.dta')
dfo = cces.read(convert_categoricals=True)
value_labels_raw = cces.value_labels()
values = {x:{u:z for z,u in y.items()} for x,y in value_labels_raw.items()}

In [25]:
values

{'EDLOAN': {'Yes': 1,
  'No': 2,
  'Other': 97,
  "Don't know": 98,
  'None of these': 99,
  'Skipped': 998,
  'Not Asked': 999},
 'V547_A': {'A - Excellent': 1,
  'B - Above Average': 2,
  'C - Average': 3,
  'D - Below Average': 4,
  'F - Poor': 5,
  'Not applicable': 6,
  'Skipped': 8,
  'Not Asked': 9},
 'V546_A': {'A - Excellent': 1,
  'B - Above Average': 2,
  'C - Average': 3,
  'D - Below Average': 4,
  'F - Poor': 5,
  'Not applicable': 6,
  'Skipped': 8,
  'Not Asked': 9},
 'V545_A': {'A - Excellent': 1,
  'B - Above Average': 2,
  'C - Average': 3,
  'D - Below Average': 4,
  'F - Poor': 5,
  'Skipped': 8,
  'Not Asked': 9},
 'V544_A': {'A - Excellent': 1,
  'B - Above Average': 2,
  'C - Average': 3,
  'D - Below Average': 4,
  'F - Poor': 5,
  'Skipped': 8,
  'Not Asked': 9},
 'V543_A': {'A - Excellent': 1,
  'B - Above Average': 2,
  'C - Average': 3,
  'D - Below Average': 4,
  'F - Poor': 5,
  'Skipped': 8,
  'Not Asked': 9},
 'CC16_427': {'A - Excellent': 1,
  'B - Abo

In [26]:
v_survey = ['commonweight_vv', 'commonweight_vv_post', 'tookpost']

v_other = ['comptype']

v_loc = ['lookupzip', 'inputstate', 'countyfips', 'countyname', 'inputzip',
         'lookupzip_post', 'inputstate_post', 'countyfips_post', 'countyname_post', 'inputzip_post']

v_dem = ['birthyr', 'gender', 'educ', 'race', 'immstat', 'CC16_361', 'citylength_1',
         'employ', 'hadjob', 'industryclass', 'ownhome', 'faminc', 'investor', 'edloan',
         'child18', 'child18num', 'marstat', 'lgbt',
         'pew_bornagain', 'pew_religimp', 'pew_churatd', 'pew_prayer', 'religpew',
         'milstat_1', 'milstat_2', 'milstat_3', 'milstat_4', 'milstat_5',
         'union', 'unionhh', 'newsint', 'internethome',
         'healthins_1', 'healthins_2', 'healthins_3', 'healthins_4', 'healthins_5', 'healthins_6']

v_media = ['CC16_300_1', 'CC16_300_2', 'CC16_300_3', 'CC16_300_4', 'CC16_300_5', 'CC16_300_6']
#           'CC16_300b', 'CC16_300c', 'CC16_300d_1', 'CC16_300d_2', 'CC16_300d_3', 'CC16_300d_4',
#           'CC16_300d_5']

v_perception = ['CC16_302', 'CC16_304',
                'CC16_427_a', 'CC16_427_b', 'CC16_427_c', 'CC16_427_d', 'CC16_427_e', 'CC16_427_f']

v_four = ['CC16_303', 'CC16_305_1', 'CC16_305_2', 'CC16_305_3', 'CC16_305_4', 'CC16_305_5',
          'CC16_305_6', 'CC16_305_7', 'CC16_305_8', 'CC16_305_9', 'CC16_305_10', 'CC16_305_11']

v_opinion = ['CC16_307', 'CC16_330a', 'CC16_330b', 'CC16_330d', 'CC16_330e',
             'CC16_331_1', 'CC16_331_2', 'CC16_331_3', 'CC16_331_7', 'CC16_331_9',
             'CC16_332a', 'CC16_332b', 'CC16_332c', 'CC16_332d', 'CC16_332e', 'CC16_332f',
             'CC16_333a', 'CC16_333b', 'CC16_333c', 'CC16_333d',
             'CC16_334a', 'CC16_334b', 'CC16_334c', 'CC16_334d', 'CC16_335',
             'CC16_337_1', 'CC16_337_2', 'CC16_337_3',
             'CC16_351B', 'CC16_351E', 'CC16_351F', 'CC16_351G', 'CC16_351H', 'CC16_351I', 'CC16_351K',
             'CC16_414_1', 'CC16_414_2', 'CC16_414_3', 'CC16_414_4', 'CC16_414_5', 'CC16_414_6', 'CC16_414_7',
             'CC16_415r', 'CC16_416r', 'CC16_422c', 'CC16_422d', 'CC16_422e', 'CC16_422f',
             'CC16_426_1', 'CC16_426_2', 'CC16_426_3', 'CC16_426_4', 'CC16_426_5']

v_polpos = ['CC16_320a', 'CC16_320b', 'CC16_320c', 'CC16_316', 'CC16_326'] # political position

v_polac = ['CC16_417a_1', 'CC16_417a_2', 'CC16_417a_3', 'CC16_417a_4', 'CC16_417a_5', 'CC16_417a_6',
           'CC16_418a'] # political activity

v_campaign = ['CC16_425a', 'CC416_25b_1', 'CC416_25b_2', 'CC416_25b_3', 'CC416_25b_4']

v_party = ['CC16_360', 'pid3', 'pid7', 'pid7_post']

v_vote = ['votereg_post', 'CC16_327', 'CC16_328', 'CC16_403']

v_vote2 = ['early'] # exclude early if voter vs. non-voter

In [27]:
for i in ['pew_bornagain', 'investor', 'child18', 'hadjob',
         'milstat_1', 'milstat_2', 'milstat_3', 'milstat_4', 'milstat_5',
         'healthins_1', 'healthins_2', 'healthins_3', 'healthins_4', 'healthins_5', 'healthins_6',
         'CC16_331_1', 'CC16_331_2', 'CC16_331_3', 'CC16_331_7', 'CC16_331_9', 'CC16_414_1',
         'CC16_414_2', 'CC16_414_3', 'CC16_414_4', 'CC16_414_5', 'CC16_414_6', 'CC16_414_7',
         'CC16_300_1','CC16_300_2','CC16_300_3','CC16_300_4','CC16_300_5','CC16_300_6','CC16_305_1',
         'CC16_305_2','CC16_305_3','CC16_305_4','CC16_305_5','CC16_305_6','CC16_305_7','CC16_305_8',
         'CC16_305_9','CC16_305_10','CC16_305_11']:
    dfo[i] = dfo[i].cat.rename_categories(values['EDLOAN']) # Yes

for i in ['CC16_427_a', 'CC16_427_b', 'CC16_427_c', 'CC16_427_d', 'CC16_427_e', 'CC16_427_f']:
    dfo[i] = dfo[i].cat.rename_categories(values['CC16_427'])

for i in ['CC16_330a', 'CC16_330b', 'CC16_330d', 'CC16_330e',
          'CC16_332a', 'CC16_332b', 'CC16_332c', 'CC16_332d',
          'CC16_332e', 'CC16_332f', 'CC16_333a', 'CC16_333b', 
          'CC16_333c', 'CC16_333d', 'CC16_334a', 'CC16_334b',
          'CC16_334c', 'CC16_334d',]:
    dfo[i] = dfo[i].cat.rename_categories(values['LABG']) # support
    
for i in ['CC16_337_1', 'CC16_337_2', 'CC16_337_3']:
    dfo[i] = dfo[i].cat.rename_categories(values['LABH']) # ranked first
    
for i in ['CC16_351B','CC16_351E','CC16_351F','CC16_351G','CC16_351H','CC16_351I','CC16_351K']:
    dfo[i] = dfo[i].cat.rename_categories(values['LABJ']) # for against
    
for i in ['CC16_422c','CC16_422d','CC16_422e','CC16_422f']:
    dfo[i] = dfo[i].cat.rename_categories(values['LABU']) # agree
    
for i in ['CC16_426_1','CC16_426_2','CC16_426_3','CC16_426_4','CC16_426_5']:
    dfo[i] = dfo[i].cat.rename_categories(values['LABX']) # increase
    
dfo['inputstate_post'] = dfo['inputstate_post'].cat.rename_categories(values['V364_A'])
dfo['industryclass'] = dfo['industryclass'].cat.rename_categories(values['INDUSTRY'])
dfo['pew_religimp'] = dfo['pew_religimp'].cat.rename_categories(values['PEW_RELI'])
dfo['pew_churatd'] = dfo['pew_churatd'].cat.rename_categories(values['PEW_CHUR'])
dfo['pew_prayer'] = dfo['pew_prayer'].cat.rename_categories(values['PEW_PRAY'])
dfo['internethome'] = dfo['internethome'].cat.rename_categories(values['LABO'])



In [28]:
other_dtype = ['citylength_1', 'child18num']
no_label = []
features = v_dem + v_perception + v_opinion + v_media + v_four
for i in features:
    j = i.upper()
    if i == 'lgbt':
        other_dtype.append(i)
    elif dfo[i].dtype.name != 'category':
        other_dtype.append(i)
    elif values.get(j, 'None') == 'None':
        no_label.append(i)
    else: 
        dfo[i] = dfo[i].cat.rename_categories(values[j])

# Complete Model

In [9]:
df_m = df.loc[((df.voted==1) | (df.voted==2))] # only voters. Own category if question not answered
df_m.fillna(998, inplace=True)
y = df_m.voted
X = df_m.loc[:,(['commonweight_vv_post', 'inputstate_post'] + v_dem + v_media + v_perception + v_four)]

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=19, stratify=y)

In [11]:
w_train = X_train['commonweight_vv_post']
X_train.drop(columns='commonweight_vv_post', inplace=True)
w_test = X_test['commonweight_vv_post']
X_test.drop(columns='commonweight_vv_post', inplace=True)
X.drop(columns='commonweight_vv_post', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [12]:
rnc = RandomForestClassifier(n_estimators=100, random_state=19, n_jobs=-1, verbose=1)
rnc.fit(X_train, y_train, sample_weight=w_train)
pred = rnc.predict(X_test)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    1.6s finished
[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed:    0.1s finished


In [13]:
print(rnc.score(X_test, y_test, sample_weight=w_test))
display(confusion_matrix(y_test, pred, sample_weight=w_test))
print(classification_report(y_test, pred, sample_weight=w_test))

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.1s
[Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed:    0.1s finished


0.8433043807529809


array([[5069.15250559, 1073.85577654],
       [ 917.57355127, 5648.32054295]])

              precision    recall  f1-score   support

         1.0       0.85      0.83      0.84 6143.008282129993
         2.0       0.84      0.86      0.85 6565.894094216816

    accuracy                           0.84 12708.902376346809
   macro avg       0.84      0.84      0.84 12708.902376346809
weighted avg       0.84      0.84      0.84 12708.902376346809



In [15]:
columns

array(['CC16_302', 'race', 'CC16_304', 'CC16_303', 'birthyr', 'religpew',
       'citylength_1', 'inputstate_post', 'industryclass', 'faminc',
       'pew_religimp', 'pew_prayer', 'CC16_427_b', 'educ', 'marstat',
       'pew_churatd', 'newsint', 'pew_bornagain', 'ownhome', 'CC16_427_c',
       'CC16_427_f', 'CC16_427_e', 'employ', 'CC16_427_a', 'CC16_427_d',
       'immstat', 'gender', 'CC16_361', 'child18num', 'unionhh',
       'milstat_5', 'union', 'CC16_300_3', 'CC16_300_4', 'investor',
       'lgbt', 'CC16_300_5', 'edloan', 'CC16_305_10', 'milstat_4',
       'CC16_300_2', 'healthins_2', 'healthins_1', 'CC16_305_11',
       'child18', 'CC16_300_1', 'CC16_305_7', 'milstat_3', 'hadjob',
       'healthins_4', 'CC16_305_2', 'CC16_305_8', 'CC16_305_1',
       'CC16_305_3', 'milstat_2', 'healthins_6', 'CC16_305_4',
       'CC16_305_6', 'internethome', 'CC16_305_9', 'CC16_300_6',
       'CC16_305_5', 'healthins_5', 'milstat_1', 'healthins_3'],
      dtype=object)

In [14]:
importances = rnc.feature_importances_
indices = np.argsort(importances)[::-1]
columns = X_train.columns.values[indices]
values = importances[indices]
for i,j in zip(columns, values):
    if i == 'lgbt':
        print(j, i)
    else:
        print(j, i, labels[i])

0.18905897370723143 CC16_302 National Economy
0.06669077925796381 race Race
0.062149909031478164 CC16_304 Next year - household income
0.044522541151303344 CC16_303 Past year - household income
0.039447657809580754 birthyr Birth year
0.02999523018080881 religpew Pew religion
0.029043437298984384 citylength_1 Current city residence length - Years
0.028100830482342277 inputstate_post State
0.027258447554263102 industryclass Industry classification
0.02491023151147862 faminc Family income
0.02449302296295402 pew_religimp Importance of religion (Pew version)
0.022438959314453236 pew_prayer Frequency of Prayer (Pew version)
0.020767335479149357 CC16_427_b Grade local community - The police
0.020666968574471638 educ Education
0.018625866237148455 marstat Marital Status
0.018383128026976656 pew_churatd Church attendance (Pew version)
0.018016474921557196 newsint Political Interest
0.017455634567479394 pew_bornagain Born Again (Pew version)
0.016247594943633353 ownhome Home ownership
0.0142909

In [20]:
columns

array(['CC16_302', 'race', 'CC16_304', 'CC16_303', 'birthyr', 'religpew',
       'citylength_1', 'inputstate_post', 'industryclass', 'faminc',
       'pew_religimp', 'pew_prayer', 'CC16_427_b', 'educ', 'marstat',
       'pew_churatd', 'newsint', 'pew_bornagain', 'ownhome', 'CC16_427_c',
       'CC16_427_f', 'CC16_427_e', 'employ', 'CC16_427_a', 'CC16_427_d',
       'immstat', 'gender', 'CC16_361', 'child18num', 'unionhh',
       'milstat_5', 'union', 'CC16_300_3', 'CC16_300_4', 'investor',
       'lgbt', 'CC16_300_5', 'edloan', 'CC16_305_10', 'milstat_4',
       'CC16_300_2', 'healthins_2', 'healthins_1', 'CC16_305_11',
       'child18', 'CC16_300_1', 'CC16_305_7', 'milstat_3', 'hadjob',
       'healthins_4', 'CC16_305_2', 'CC16_305_8', 'CC16_305_1',
       'CC16_305_3', 'milstat_2', 'healthins_6', 'CC16_305_4',
       'CC16_305_6', 'internethome', 'CC16_305_9', 'CC16_300_6',
       'CC16_305_5', 'healthins_5', 'milstat_1', 'healthins_3'],
      dtype=object)

# Model for Demo

In [29]:
df_m = df.loc[((df.voted==1) | (df.voted==2))] # only voters. Own category if question not answered
df_m.fillna(998, inplace=True)
y = df_m.voted
X = df_m.loc[:,['commonweight_vv_post', 'birthyr', 'race', 'religpew', 'citylength_1', 'industryclass',
               'CC16_302', 'CC16_304', 'inputstate_post']]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  **kwargs


In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=19, stratify=y)

In [31]:
#X_try = pd.DataFrame({'gender': [1, 2]})

w_train = X_train['commonweight_vv_post']
X_train.drop(columns='commonweight_vv_post', inplace=True)
w_test = X_test['commonweight_vv_post']
X_test.drop(columns='commonweight_vv_post', inplace=True)

rnc000 = RandomForestClassifier(n_estimators=100, random_state=19, n_jobs=-1)
rnc000.fit(X_train, y_train, sample_weight=w_train)
pred000 = rnc000.predict(X_test)

#probs = rnc.predict_proba(X_try)
#probs
#pd.concat([pd.DataFrame(probs), X_try], axis=1)
#pd.DataFrame(probs)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [32]:
print(rnc000.score(X_test, y_test, sample_weight=w_test))
display(confusion_matrix(y_test, pred000, sample_weight=w_test))
print(classification_report(y_test, pred000, sample_weight=w_test))

0.8213825351877488


array([[4885.21936539, 1257.78891674],
       [1012.24300627, 5553.65108795]])

              precision    recall  f1-score   support

         1.0       0.83      0.80      0.81 6143.008282129993
         2.0       0.82      0.85      0.83 6565.894094216816

    accuracy                           0.82 12708.902376346809
   macro avg       0.82      0.82      0.82 12708.902376346809
weighted avg       0.82      0.82      0.82 12708.902376346809



# Demo

In [33]:
input_dict_start = {'birthyr': 998, 'race': 998, 'religpew': 998, 'citylength_1': 998,
                      'industryclass': 998, 'CC16_302': 998, 'CC16_304': 998,
                      'inputstate_post': 998}

In [34]:
l_dict = {'birthyr': {'text': 'In what year were you born?',
              'Minimum': 1917,
              'Maximum': 2020},
          'race': {'text': 'What racial or ethnic group best describes you?',
              'White': 1,
              'Black': 2,
              'Hispanic': 3,
              'Asian': 4,
              'Native American': 5,
              'Mixed': 6,
              'Other': 7,
              'Middle Eastern': 8},
          'religpew': {'text': 'What is your present religion, if any?',
              'Protestant': 1,
              'Roman Catholic': 2,
              'Mormon': 3,
              'Eastern or Greek Orthodox': 4,
              'Jewish': 5,
              'Muslim': 6,
              'Buddhist': 7,
              'Hindu': 8,
              'Atheist': 9,
              'Agnostic': 10,
              'Nothing in particular': 11,
              'Something else': 12},
          'citylength_1': {'text': 'How long have you lived in your current city of residence? (Years)',
              'Minimum': 0,
              'Maximum': 100},
          'industryclass': {'text': 'In what industry do you work, if any?',
              'Agriculture': 1,
              'Forestry': 2,
              'Fishing and Hunting': 3,
              'Mining': 4,
              'Utilities': 5,
              'Construction': 6,
              'Manufacturing': 7,
              'Wholesale Trade': 8,
              'Retail Trade': 9,
              'Transportation  and Warehousing': 10,
              'Information': 11,
              'Finance  and Insurance': 12,
              'Real Estate and Rental and Leasing': 13,
              'Professional, Scientific, and Technical Services': 14,
              'Management of Companies and Enterprises': 15,
              'Administrative and Support': 16,
              'Waste Management and Remediation Services': 17,
              'Education Services': 18,
              'Health Care and Social Assistance': 19,
              'Arts, Entertainment, and Recreation': 20,
              'Hotel Accommodation  and  Food Services': 21,
              'Other Services (except Public Administration)': 22,
              'Public Administration': 23,
              'No Employment': 99},
          'CC16_302': {'text': 'OVER THE PAST YEAR the nation’s economy has ...?',
              'Gotten much better': 1,
              'Gotten better': 2,
              'Stayed about the same': 3,
              'Gotten worse': 4,
              'Gotten much worse': 5,
              'Not sure': 6},
          'CC16_304': {'text': 'OVER THE NEXT YEAR, do you think the nation’s economy will ...?',
              'Get much better': 1,
              'Get somewhat better': 2,
              'Stay about the same': 3,
              'Get somewhat worse': 4,
              'Get much worse': 5,
              'Not sure': 6},
          'inputstate_post': {'text': 'In what state/ region do you live?',
              'Alabama': 1,
              'Alaska': 2,
              'Arizona': 4,
              'Arkansas': 5,
              'California': 6,
              'Colorado': 8,
              'Connecticut': 9,
              'Delaware': 10,
              'District of Columbia': 11,
              'Florida': 12,
              'Georgia': 13,
              'Hawaii': 15,
              'Idaho': 16,
              'Illinois': 17,
              'Indiana': 18,
              'Iowa': 19,
              'Kansas': 20,
              'Kentucky': 21,
              'Louisiana': 22,
              'Maine': 23,
              'Maryland': 24,
              'Massachusetts': 25,
              'Michigan': 26,
              'Minnesota': 27,
              'Mississippi': 28,
              'Missouri': 29,
              'Montana': 30,
              'Nebraska': 31,
              'Nevada': 32,
              'New Hampshire': 33,
              'New Jersey': 34,
              'New Mexico': 35,
              'New York': 36,
              'North Carolina': 37,
              'North Dakota': 38,
              'Ohio': 39,
              'Oklahoma': 40,
              'Oregon': 41,
              'Pennsylvania': 42,
              'Rhode Island': 44,
              'South Carolina': 45,
              'South Dakota': 46,
              'Tennessee': 47,
              'Texas': 48,
              'Utah': 49,
              'Vermont': 50,
              'Virginia': 51,
              'Washington': 53,
              'West Virginia': 54,
              'Wisconsin': 55,
              'Wyoming': 56,
              'American Samoa': 60,
              'Federated States of Micronesia': 64,
              'Guam': 66,
              'Marshall Islands': 68,
              'Northern Mariana Islands': 69,
              'Palau': 70,
              'Puerto Rico': 72,
              'U.S. Minor Outlying Islands': 74,
              'Virgin Islands': 78,
              'Alberta': 81,
              'British Columbia': 82,
              'Manitoba': 83,
              'New Brunswick': 84,
              'Newfoundland': 85,
              'Northwest Territories': 86,
              'Nova Scotia': 87,
              'Nunavut': 88,
              'Ontario': 89,
              'Prince Edward Island': 90,
              'Quebec': 91,
              'Saskatchewan': 92,
              'Yukon Territory': 93,
              'Not in the U.S. or Canada': 99}}

In [35]:
vote_label = ['Donald Trump', 'Hillary Clinton']

In [36]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [16]:
def voting():
    input_dict = input_dict_start
    for i in input_dict.keys():
        print(l_dict[i]['text'])
        if (i != 'birthyr') & (i != 'citylength_1'):
            display(pd.DataFrame.from_dict(l_dict[i], orient='index', columns=['pick number'])[1:])
            input_dict[i] = input('')
        else:
            input_dict[i] = input('')
        print(' '), print('- - - - - - - - - - - - - - - -'), print(' ')
    X_input = pd.DataFrame(input_dict, index=['inputed'])
    X_input = pd.DataFrame(input_dict, index=['inputed'])
    vote = int(rnc000.predict(X_input)[0])
    v_prob = rnc000.predict_proba(X_input)
    vote_prob = int(v_prob[0][vote-1]*100)
    vote_t = int(v_prob[0][0]*100)
    vote_c = int(v_prob[0][1]*100)
    print(' '), print(' '), print(' '), print(' '), print(' ')
    print(color.BOLD + 'You would have voted for ' + color.RED + vote_label[vote-1].upper() + '!' + color.END)
    print(' '), print(' '), print(' ')
    print('Probabilities: Trump ' + str(vote_t) + '%, Clinton ' + str(vote_c) + '%')

# start here

In [32]:
voting()

In what year were you born?
1986
 
- - - - - - - - - - - - - - - -
 
What racial or ethnic group best describes you?


Unnamed: 0,pick number
White,1
Black,2
Hispanic,3
Asian,4
Native American,5
Mixed,6
Other,7
Middle Eastern,8


1
 
- - - - - - - - - - - - - - - -
 
What is your present religion, if any?


Unnamed: 0,pick number
Protestant,1
Roman Catholic,2
Mormon,3
Eastern or Greek Orthodox,4
Jewish,5
Muslim,6
Buddhist,7
Hindu,8
Atheist,9
Agnostic,10


10
 
- - - - - - - - - - - - - - - -
 
How long have you lived in your current city of residence? (Years)
4
 
- - - - - - - - - - - - - - - -
 
In what industry do you work, if any?


Unnamed: 0,pick number
Agriculture,1
Forestry,2
Fishing and Hunting,3
Mining,4
Utilities,5
Construction,6
Manufacturing,7
Wholesale Trade,8
Retail Trade,9
Transportation and Warehousing,10


14
 
- - - - - - - - - - - - - - - -
 
OVER THE PAST YEAR the nation’s economy has ...?


Unnamed: 0,pick number
Gotten much better,1
Gotten better,2
Stayed about the same,3
Gotten worse,4
Gotten much worse,5
Not sure,6


4
 
- - - - - - - - - - - - - - - -
 
OVER THE NEXT YEAR, do you think the nation’s economy will ...?


Unnamed: 0,pick number
Get much better,1
Get somewhat better,2
Stay about the same,3
Get somewhat worse,4
Get much worse,5
Not sure,6


4
 
- - - - - - - - - - - - - - - -
 
In what state/ region do you live?


Unnamed: 0,pick number
Alabama,1
Alaska,2
Arizona,4
Arkansas,5
California,6
Colorado,8
Connecticut,9
Delaware,10
District of Columbia,11
Florida,12


99
 
- - - - - - - - - - - - - - - -
 
 
 
 
 
 
[1mYou would have voted for [91mDONALD TRUMP![0m
 
 
 
Probabilities: Trump 57%, Clinton 42%


# Tkinter

In [74]:
import tkinter as tk

In [None]:
input_dict

In [78]:
import tkinter.font as ft

In [79]:
default_font = ft.nametofont("TkDefaultFont")
default_font.configure(size=48)

In [100]:
# this should be in classes! but it's working anyway ;-)


def calculate_vote():
    X_input = pd.DataFrame(input_dict, index=['inputed'])
    X_input = pd.DataFrame(input_dict, index=['inputed'])
    vote = int(rnc000.predict(X_input)[0])
    v_prob = rnc000.predict_proba(X_input)
    vote_prob = int(v_prob[0][vote-1]*100)
    vote_t = int(v_prob[0][0]*100)
    vote_c = int(v_prob[0][1]*100)
    global result_vote
    global result_prob
    result_vote = 'You would have voted for \n' + vote_label[vote-1].upper() + '!'
    result_prob = 'Probabilities: Trump ' + str(vote_t) + '%, Clinton ' + str(vote_c) + '%'


def m_start():
    input_dict = input_dict_start.copy()
    w_birthyr.pack()
    w_start.pack_forget()
    
def m_birthyr():
    input_dict['birthyr'] = int(v_birthyr.get())
    w_race.pack()
    w_birthyr.pack_forget()
    
def m_race():
    input_dict['race'] = v_race.get()
    w_reli.pack()
    w_race.pack_forget()

def m_reli():
    input_dict['religpew'] = v_reli.get()
    w_city.pack()
    w_reli.pack_forget()

def m_city():
    input_dict['citylength_1'] = int(v_city.get())
    w_industry.pack()
    w_city.pack_forget()
    
def m_industry():
    input_dict['industryclass'] = v_industry.get()
    w_past.pack()
    w_industry.pack_forget()
    
def m_past():
    input_dict['CC16_302'] = v_past.get()
    w_next.pack()
    w_past.pack_forget()
    
def m_next():
    input_dict['CC16_304'] = v_next.get()
    w_state.pack()
    w_next.pack_forget()
    
def m_state():
    statenum_picked = state_list.curselection()
    statenum_clean = statenum_picked[0]
    state_picked = state_list.get(statenum_clean)
    input_dict['inputstate_post'] = l_dict['inputstate_post'][state_picked]
    calculate_vote()
    show_result()
    
def show_result():
    def m_over():
        w_result.destroy()
        w_start.pack()
        w_state.pack_forget()
        
    w_result = tk.Toplevel()
    result_label = tk.Label(w_result, text=result_vote).pack()
    vote_label = tk.Label(w_result, text=result_prob).pack()
    start_again = tk.Button(w_result, text='Start Over', command=m_over).pack()
    

    
    
    

fenster = tk.Tk()
fenster.title('Trump or Clinton?')
fenster.geometry("800x800")

default_font = ft.nametofont("TkDefaultFont")
default_font.configure(size=18)


w_start = tk.Frame(fenster)
w_birthyr = tk.Frame(fenster)
w_race = tk.Frame(fenster)
w_reli = tk.Frame(fenster)
w_city = tk.Frame(fenster)
w_industry = tk.Frame(fenster)
w_past = tk.Frame(fenster)
w_next = tk.Frame(fenster)
w_state = tk.Frame(fenster)


start_label = tk.Label(w_start, text='Would you have voted for Trump or Clinton?').pack(anchor='center')
start_button = tk.Button(w_start, text='Start', command=m_start).pack(anchor='center')

v_birthyr = tk.StringVar()
birthyr_label = tk.Label(w_birthyr, text=l_dict['birthyr']['text']).pack()
birthyr_entry = tk.Entry(w_birthyr, textvariable=v_birthyr).pack()
birthyr_next = tk.Button(w_birthyr, text='Next', command=m_birthyr).pack()


v_race = tk.IntVar(value=998)
for j in l_dict['race']:
    if j == 'text':
        j_label = tk.Label(w_race, text=l_dict['race']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_race, text=j, variable=v_race, value=l_dict['race'][j]
                                ).pack(anchor='w', padx=20)
race_next = tk.Button(w_race, text='Next', command=m_race).pack()


v_reli = tk.IntVar(value=998)
for j in l_dict['religpew']:
    if j == 'text':
        j_label = tk.Label(w_reli, text=l_dict['religpew']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_reli, text=j, variable=v_reli, value=l_dict['religpew'][j]
                                ).pack(anchor='w', padx=20)
reli_next = tk.Button(w_reli, text='Next', command=m_reli).pack()



v_city = tk.StringVar()
city_label = tk.Label(w_city, text=l_dict['citylength_1']['text']).pack()
city_entry = tk.Entry(w_city, textvariable=v_city).pack()
city_next = tk.Button(w_city, text='Next', command=m_city).pack()



v_industry = tk.IntVar(value=998)
for j in l_dict['industryclass']:
    if j == 'text':
        j_label = tk.Label(w_industry, text=l_dict['industryclass']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_industry, text=j, variable=v_industry, value=l_dict['industryclass'][j]
                                ).pack(anchor='w', padx=20)
industry_next = tk.Button(w_industry, text='Next', command=m_industry).pack()



v_past = tk.IntVar(value=998)
for j in l_dict['CC16_302']:
    if j == 'text':
        j_label = tk.Label(w_past, text=l_dict['CC16_302']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_past, text=j, variable=v_past, value=l_dict['CC16_302'][j]
                                ).pack(anchor='w', padx=20)
past_next = tk.Button(w_past, text='Next', command=m_past).pack()



v_next = tk.IntVar(value=998)
for j in l_dict['CC16_304']:
    if j == 'text':
        j_label = tk.Label(w_next, text=l_dict['CC16_304']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_next, text=j, variable=v_next, value=l_dict['CC16_304'][j]
                                ).pack(anchor='w', padx=20)
next_next = tk.Button(w_next, text='Next', command=m_next).pack()



states = list(l_dict['inputstate_post'].keys())[1:]
v_state = tk.StringVar(value=states)
state_label = tk.Label(w_state, text=l_dict['inputstate_post']['text']).pack()
state_list = tk.Listbox(w_state, listvariable=v_state, height=40, font=14)
state_list.pack(fill='both')
state_end = tk.Button(w_state, text='Commit', command=m_state).pack()
#s = tk.Scrollbar(w_state, orient='vertical', command=state_list.yview)
#state_list.configure(yscrollcommand=s.set)



w_start.pack(fill='both')
fenster.mainloop()

In [26]:
input_dict

{'birthyr': 998,
 'race': 998,
 'religpew': 998,
 'citylength_1': 998,
 'industryclass': 998,
 'CC16_302': 998,
 'CC16_304': 998,
 'inputstate_post': 99}

In [None]:
states = list(l_dict['inputstate_post'].keys())[1:]
v_state = tk.StringVar(value=states)
state_list = tk.Listbox(w_state, listvariable=v_state, width=40, height=40).pack()

In [68]:
for j in l_dict['inputstate_post']:
    if j == 'text':
        j_label = tk.Label(w_state, text=l_dict['inputstate_post']['text'])
        j_label.pack()
    else:
        j_radio = tk.Radiobutton(w_state, text=j, variable=v_state, value=l_dict['inputstate_post'][j]
                                ).pack(anchor='w', padx=20)
state_scroll = tk.Scrollbar(w_state, orient='vertical').pack(fill='y', side='right')
state_next = tk.Button(w_state, text='Next', command=m_state).pack()
state_scroll = tk.Scrollbar(w_state, orient='vertical', command=w_state.yview).pack(fill='y', side='right')

NameError: name 'race_answer' is not defined

In [39]:
for j in l_dict['race']:
    if j == 'text':
        j_label = tk.Label(fenster, text=l_dict['race']['text'])
    else:
        j_label = tk.Label(fenster, text=j)
    j_label.pack()

In [14]:
list(l_dict['inputstate_post'].keys())[1:]

['Alabama',
 'Alaska',
 'Arizona',
 'Arkansas',
 'California',
 'Colorado',
 'Connecticut',
 'Delaware',
 'District of Columbia',
 'Florida',
 'Georgia',
 'Hawaii',
 'Idaho',
 'Illinois',
 'Indiana',
 'Iowa',
 'Kansas',
 'Kentucky',
 'Louisiana',
 'Maine',
 'Maryland',
 'Massachusetts',
 'Michigan',
 'Minnesota',
 'Mississippi',
 'Missouri',
 'Montana',
 'Nebraska',
 'Nevada',
 'New Hampshire',
 'New Jersey',
 'New Mexico',
 'New York',
 'North Carolina',
 'North Dakota',
 'Ohio',
 'Oklahoma',
 'Oregon',
 'Pennsylvania',
 'Rhode Island',
 'South Carolina',
 'South Dakota',
 'Tennessee',
 'Texas',
 'Utah',
 'Vermont',
 'Virginia',
 'Washington',
 'West Virginia',
 'Wisconsin',
 'Wyoming',
 'American Samoa',
 'Federated States of Micronesia',
 'Guam',
 'Marshall Islands',
 'Northern Mariana Islands',
 'Palau',
 'Puerto Rico',
 'U.S. Minor Outlying Islands',
 'Virgin Islands',
 'Alberta',
 'British Columbia',
 'Manitoba',
 'New Brunswick',
 'Newfoundland',
 'Northwest Territories',
 'Nova

In [None]:
def button_action()

In [78]:
exit_button = tk.Button(fenster, text='Stop', command=fenster.quit)
exit_button.pack()

In [17]:
question_label = tk.Label(fenster, text='Hier erscheint die Frage. \n\ Bitte beantworte sie.')

In [49]:
question_label.pack()
exit_button.pack()

In [53]:
fenster.mainloop()