In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
from scipy import stats
pd.set_option('display.max_columns', 100)
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn import svm

### Functions

In [9]:
#pull in function to dummify columns
def dummify_columns(dataframe,var_list):
    '''
    dummifies a columns, merges with the dataframe, and drops the non-dummified column
    ------------
    dataframe: full dataframe
    variable: list of column names as string
    '''
    for vr in var_list:
        dummified_feature = pd.get_dummies(dataframe[vr], prefix=vr,drop_first=True)
        dataframe = pd.concat([dataframe,dummified_feature],axis=1,sort='False')
    dataframe = dataframe.drop(var_list, axis=1)
    return dataframe

def xy_split(dataframe,target):
    '''
    splits a dataframe into a target array and estimator dataframe
    '''
    y=dataframe[target]
    X=dataframe.drop(target, axis=1)
    return X,y

### All_Imputed Model

In [7]:
#load in dataframe
cchd = pd.read_csv('cchd_all_imputed_colfixed.csv')
cchd = cchd.drop('Unnamed: 0', axis=1)

In [8]:
#redefine variable dictionary
variables = {'nominal_categorical_ndummified':['MBSTATE_REC','MRACEHISP','MAR_P','DMAR','MEDUC','FRACEHISP',\
                                    'FEDUC','WIC','RF_PDIAB','RF_GDIAB','RF_PHYPE','RF_GHYPE',\
                                    'RF_EHYPE','RF_PPTERM','RF_FEDRG','RF_ARTEC','DOB_MM',\
                                  'IP_GON','IP_SYPH','IP_CHLAM','IP_HEPB','IP_HEPC', 'PAY', 'SEX'],\
             'nominal_categorical_dummified': ['lrg_miss_imp'],\
           'continuous':['PRECARE','MAGER', 'FAGECOMB','PRIORTERM','PRIORLIVE','PRIORDEAD',\
                         'ILLB_R','ILOP_R','PREVIS','CIG_0','CIG_1','M_Ht_In','BMI',\
                         'WTGAIN','RF_CESARN','OEGest_Comb'],\
            'target':['CA_CCHD']}

In [12]:
cchd2 = cchd.copy()
cchd2 = dummify_columns(cchd2, variables['nominal_categorical_ndummified'])
X,y = xy_split(cchd2, 'CA_CCHD')

In [13]:
#run standardized overnight with default C, poly kernel, 3 degrees

svm_model = svm.SVC()
grid_para_svm = [
    {'C': [1, 10, 100, 1000],
     'kernel': ['poly'],
     'degree': [1, 2, 3]},
    {'C': [1, 10, 100, 1000],
     'gamma': [0.001, 0.0001],
     'kernel': ['rbf']}
]
grid_search_svm = GridSearchCV(svm_model, grid_para_svm, scoring='accuracy', cv=3, return_train_score=True,  n_jobs=-1)
grid_search_svm.fit(X, y)

KeyboardInterrupt: 