In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
import time 

def warn(*args, **kwargs): pass
import warnings
warnings.warn = warn

from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import StratifiedShuffleSplit
import itertools
train = pd.read_csv('mytrain.csv')
test = pd.read_csv('mytest.csv')

#  function to organize the data

def encode(train, test):
    le = LabelEncoder().fit(train.appliances) 
    labels = le.transform(train.appliances)           # encode appliances strings
    classes = list(le.classes_)                    # save column names 
    test_ids = test.id                             # save test ids
    
    train = train.drop(['appliances','id'], axis=1)  
    test = test.drop(['id'], axis=1)
    
    return train, labels, test, test_ids, classes



#param={solver='lbfgs', alpha=1e-5,
                #hidden_layer_sizes=(300, 7), random_state=1}
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
train, labels, test, test_ids, classes = encode(train, test)
train.head(1)

sss = StratifiedShuffleSplit(labels, 1, test_size=0.4, random_state=23)
max_iter=1000
for train_index, test_index in sss:
    X_train, X_test = train.values[train_index], train.values[test_index]
    y_train, y_test = labels[train_index], labels[test_index]
    
    
classifiers = [
    KNeighborsClassifier(7),
    MLPClassifier(activation='logistic',verbose=True, random_state=0,max_iter=1000,solver='adam', alpha=1e-5,learning_rate_init=0.000001,
                  hidden_layer_sizes=(3500,)),
    DecisionTreeClassifier(max_depth=5,max_leaf_nodes=7,random_state=0),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=3000),
    AdaBoostClassifier(n_estimators=600,learning_rate=0.01),
    
    GradientBoostingClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(solver='lsqr',shrinkage='auto'),
    QuadraticDiscriminantAnalysis(),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    SVC(kernel="linear", C=0.025,probability=True),
    SVC(gamma=2, C=1, probability=True),
    NuSVC()###
]

# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

for clf in classifiers:
    d=time.clock()
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('   Results   ') 
    # Compute confusion matrix
    y_pred = clf.fit(X_train, y_train).predict(X_test)
    # Compute confusion matrix
    print('   accuracy   ')
    acc = accuracy_score(y_test, y_pred)
    print("Accuracy: {:.4%}".format(acc))
    from sklearn import  metrics
    cnf_matrix = confusion_matrix(y_test, y_pred)
    print("Classification report for classifier %s:\n%s\n"
          % (clf, metrics.classification_report(y_test,y_pred)))
    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=classes,cmap=plt.cm.Greens,
                          title='Confusion matrix, DT')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=classes, normalize=True,
                          title='Normalized confusion matrix')

    plt.show()