In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import cross_val_score, StratifiedKFold, train_test_split, KFold
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.grid_search import GridSearchCV


In [None]:
le = LabelEncoder()
y = le.fit_transform(df['acceptability'])
X = pd.get_dummies(df.drop('acceptability', axis=1))

In [None]:
def get_cv(target):
    return StratifiedKFold(target, n_folds=3, shuffle=True, random_state=41)

In [None]:

def score(model, data, target):
    model.fit(data, target)
    return model.score(data, target)

In [None]:

def grid_search(model, params, cv):
    return GridSearchCV(estimator=model, 
                    param_grid=params,
                    cv=cv
                   )

In [None]:

def evaluate_model(model, data, target, params=None):
    x_train, x_test, y_train, y_test=train_test_split(data, target, stratify=target)
#     print score(model, x_train, y_train), "\n"
    cv=get_cv(y_train)
    if params:
        grid=grid_search(model, params, cv)
    
        grid.fit(x_train, y_train)
        model = grid.best_estimator_
        print "Best Model after Grid Search:\n", model
        
    else:
        model.fit(x_train, y_train)
        
    s=cross_val_score(model, x_train, y_train, cv=cv, n_jobs=-1)
    print "Mean score of the model is: {}".format(s.mean())
    predictions = model.predict(x_test)
    
    print "Confusion Matrix:\n",confusion_matrix(y_test, predictions), "\n"
    print "Classification Report:\n", classification_report(y_test, predictions)
    return model

In [None]:

knn=KNeighborsClassifier()
params={
    'n_neighbors':range(1, len(X.columns)),
    'weights':['uniform', 'distance']
}
knn=evaluate_model(knn, X, y, params)