In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier

df = pd.read_csv('train.csv',encoding = 'ISO-8859-1')
# Features I feel are important 
df = df[['agency_name', 'zip_code', 'violation_code', 'disposition',
       'judgment_amount', 'compliance']]

df.dropna(axis=0,how='any',inplace=True)

# Creating categorical labels
df['agency_name'] = pd.Categorical(df['agency_name'])
df['agency_name_d'] = df['agency_name'].cat.codes + 1

df['zip_code'] = pd.Categorical(df['zip_code'])
df['zip_code_d'] = df['zip_code'].cat.codes + 1 

df['violation_code'] = pd.Categorical(df['violation_code'])
df['violation_code_d'] = df['violation_code'].cat.codes + 1

df['disposition'] = pd.Categorical(df['disposition'])
df['disposition_d'] = df['disposition'].cat.codes + 1
#To keep only processed columns
df = df.drop(axis = 1, 
             labels = ['agency_name','zip_code','violation_code','disposition'])

X = df.drop(axis=1,labels = ['compliance'])
y = df['compliance']


In [None]:
# Do not run all at once

model_param = {
    'DecisionTreeClassifier':{
        'model' : DecisionTreeClassifier(),
        'param' :{ 
            'criterion' : ['gini', 'entropy']
            }
        }
    'KNeighborsClassifier':{
        'model' : KNeighborsClassifier(),
        'param' :{
            'n_neighbors': [25,30,35,40]
            }
        }
    'SVC' :{
        'model' : SVC(),
        'param' :{
            'kernel' : ['rbf','linear','sigmoid'],
            'C' : [0.1,1,10,100]
            }
        }
    'RandomForestClassifier':{
        'model' : RandomForestClassifier(n_jobs=-1,criterion = 'gini'),
        'param' :{
            'max_features' : ['auto','sqrt','log2']
            }
        }
    'GradientBoostingClassifier':{
        'model' : GradientBoostingClassifier(learning_rate = 0.1),
        'param' : {

            'n_estimators'  : [10000]
            }
        }
    }

In [None]:
#Running Grid Search

scores = []
for model_name,mp in model_param.items():
    model_selection = GridSearchCV(estimator = mp['model'],
                                  param_grid = mp['param'],
                                  cv=5,scoring = 'roc_auc')
    model_selection.fit(X,y)
    scores.append({'model' : model_name,
                   'best_score' : model_selection.best_score_,
                   'best_params' : model_selection.best_params_
                  })


In [None]:
#Visualising results
df_model_score = pd.DataFrame(scores,
                              columns = ['model',
                                        'best_score',
                                        'best_params'])
df_model_score