In [60]:
########################################################################
#	Loading and inspecting the dataset
########################################################################

from sklearn.datasets import load_digits
digits = load_digits()
#print(digits.data.shape)
#print(digits.DESCR)

In [62]:
########################################################################
# 	Visualization of data
########################################################################
# just the first
%matplotlib inline  
import matplotlib.pyplot as plt 
#plt.gray() 
#plt.matshow(digits.images[0]) 
#plt.show() 

# top 5
#import numpy as np 
#plt.figure(figsize=(20,4))
#for index, (image, label) in enumerate(zip(digits.data[0:5], digits.target[0:5])):
#    plt.subplot(1, 5, index + 1)
#    plt.imshow(np.reshape(image, (8,8)), cmap=plt.cm.gray)
#    plt.title('Training: %i\n' % label, fontsize = 20)


In [4]:
########################################################################
#	Decision Tree
########################################################################

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc

data, test_data, target, test_target = train_test_split(digits.data, digits.target, test_size = 0.05)

parameters = {'max_depth': [10, 15, 20, 25, 30],
              'min_samples_split': [2, 3, 4],
              'min_samples_leaf': [1, 2, 3],
              'max_leaf_nodes': [300, 400, 500],
              #'min_weight_fraction_leaf': [],
              #'max_features': [],
              #'min_impurity_decrease': [] 
             }

dt = DecisionTreeClassifier()
clf = GridSearchCV(dt, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n',clf.best_estimator_, '\n')
print('3. Best Score: \n',clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')

1. Best Parameters: 
 {'max_depth': 10, 'max_leaf_nodes': 500, 'min_samples_leaf': 1, 'min_samples_split': 2} 

2. Best Estimator: 
 DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=10,
            max_features=None, max_leaf_nodes=500,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best') 

3. Best Score: 
 0.849443468073 

4. Confusion Matrix: 
 [[ 7  0  1  0  0  0  0  0  0  0]
 [ 0  7  0  0  0  0  0  0  1  0]
 [ 0  1  7  0  0  0  0  0  0  0]
 [ 0  0  1 10  0  0  0  0  0  0]
 [ 0  0  1  0  6  0  0  0  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  1  1  0  0  5  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  1  1  0  0  0  0  0  9  1]
 [ 0  0  0  0  0  1  0  0  0 10]] 

5. Accuracy: 
 0.866666666667 

6. Classification Report: 
              precision    recall  f1-score   support

          0   

In [None]:
########################################################################
#	Neural Net
########################################################################

from sklearn.neural_network import MLPClassifier

parameters = {'hidden_layer_sizes': [50, 75, 100],
              'activation': ['identity', 'logistic','relu'],
              'max_iter': [100, 200],
              'alpha': [0.0001, 0.0002],
              #'learning_rate': ['constant', 'invscaling', 'adaptive'],
              #'tol': [],
              #'momentum': [] 
              #'early_stopping': [] 
             }

mlp = MLPClassifier()
clf = GridSearchCV(mlp, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n',clf.best_estimator_, '\n')
print('3. Best Score: \n',clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')









1. Best Parameters: 
 {'activation': 'logistic', 'alpha': 0.0001, 'hidden_layer_sizes': 100, 'max_iter': 200} 

2. Best Estimator: 
 MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=100, learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False) 

3. Best Score: 
 0.978910369069 

4. Confusion Matrix: 
 [[ 8  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  0  8  0  0  0  0  0  0  0]
 [ 0  0  0 11  0  0  0  0  0  0]
 [ 0  0  0  0  7  0  0  0  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  0  0  0  0  7  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  1  0  0  0  0  0  0 11  0]
 [ 0  0  0  0  0  0  0  0  0 11]] 

5. Accuracy: 
 0.977777777778 

6. Class

In [22]:
########################################################################
#	SVM
########################################################################

from sklearn.svm import SVC

parameters = {'C': [1.0, 1.5, 2],
              'kernel': ['rbf', 'poly'],
              'gamma': ['auto', 1, 10], 
              'degree': [2, 3, 4], 
              'max_iter': [-1, 1000],
              #'random_state': [],
             }

svc = SVC()
clf = GridSearchCV(svc, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n',clf.best_estimator_, '\n')
print('3. Best Score: \n',clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')

1. Best Parameters: 
 {'C': 1.0, 'degree': 3, 'gamma': 'auto', 'kernel': 'poly', 'max_iter': -1} 

2. Best Estimator: 
 SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False) 

3. Best Score: 
 0.988283538371 

4. Confusion Matrix: 
 [[ 8  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  0  8  0  0  0  0  0  0  0]
 [ 0  0  0 11  0  0  0  0  0  0]
 [ 0  0  0  0  7  0  0  0  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  0  0  0  0  7  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  0  0  0  0  0  0  0 12  0]
 [ 0  0  0  0  0  0  0  0  0 11]] 

5. Accuracy: 
 0.988888888889 

6. Classification Report: 
              precision    recall  f1-score   support

          0       1.00      1.00      1.00         8
          1       1.00      1.00      1.00         8
          2       1.00      1.00      1.00

In [58]:
########################################################################
#	Gaussian Naive Bayes
########################################################################

from sklearn.naive_bayes import GaussianNB

parameters = {'priors': [None,[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1]]}

gnb = GaussianNB()
clf = GridSearchCV(gnb, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n', clf.best_estimator_, '\n')
print('3. Best Score: \n', clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')

1. Best Parameters: 
 {'priors': None} 

2. Best Estimator: 
 GaussianNB(priors=None) 

3. Best Score: 
 0.81956649092 

4. Confusion Matrix: 
 [[ 8  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  1  5  0  0  0  0  0  2  0]
 [ 0  0  0  9  0  0  0  0  2  0]
 [ 0  0  0  0  6  0  0  1  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  0  0  0  0  7  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  1  0  0  0  0  0  0 11  0]
 [ 0  1  0  1  0  0  0  0  0  9]] 

5. Accuracy: 
 0.888888888889 

6. Classification Report: 
              precision    recall  f1-score   support

          0       1.00      1.00      1.00         8
          1       0.73      1.00      0.84         8
          2       1.00      0.62      0.77         8
          3       0.90      0.82      0.86        11
          4       1.00      0.86      0.92         7
          5       1.00      0.90      0.95        10
          6       1.00      1.00      1.00         7
          7       0.89      1.00      

In [44]:
########################################################################
#	Logistic Regression
########################################################################

from sklearn.linear_model import LogisticRegression

parameters = {'penalty': ['l1', 'l2'],
              'tol': [1e-4, 1e-5],
              'C': [0.5, 1.0, 1.5],
              'fit_intercept': ['True', 'False'],
              'class_weight': [None, 'balanced'],
              #'max_iter': [],
              #'multi_class': [] 
             }

logisticRegr = LogisticRegression()
clf = GridSearchCV(logisticRegr, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n',clf.best_estimator_, '\n')
print('3. Best Score: \n',clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')

1. Best Parameters: 
 {'C': 0.5, 'class_weight': None, 'fit_intercept': 'True', 'penalty': 'l1', 'tol': 0.0001} 

2. Best Estimator: 
 LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept='True',
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False) 

3. Best Score: 
 0.961335676626 

4. Confusion Matrix: 
 [[ 8  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  0  8  0  0  0  0  0  0  0]
 [ 0  0  0 11  0  0  0  0  0  0]
 [ 0  0  0  0  7  0  0  0  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  0  0  0  0  7  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  0  0  0  0  0  0  0 12  0]
 [ 0  0  0  0  0  0  0  0  0 11]] 

5. Accuracy: 
 0.988888888889 

6. Classification Report: 
              precision    recall  f1-score   support

          0       1.00      1.00      1.00         8
          1       1.00      1.00      1

In [32]:
########################################################################
#	k-NN
########################################################################

from sklearn.neighbors import KNeighborsClassifier

parameters = {'n_neighbors': [3, 5, 7],
              'weights': ['uniform', 'distance'],
              'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
              'p': [1, 2, 3]
             }

knn = KNeighborsClassifier()
clf = GridSearchCV(knn, parameters, cv=5)
clf.fit(data, target)
sorted(clf.cv_results_.keys())
print('1. Best Parameters: \n', clf.best_params_, '\n')
print('2. Best Estimator: \n',clf.best_estimator_, '\n')
print('3. Best Score: \n',clf.best_score_, '\n')

predictions = clf.predict(test_data)
print('4. Confusion Matrix: \n', metrics.confusion_matrix(test_target, predictions), '\n')
print('5. Accuracy: \n', np.mean(predictions == test_target), '\n')
print('6. Classification Report: \n', metrics.classification_report(test_target, predictions), '\n')

1. Best Parameters: 
 {'algorithm': 'auto', 'n_neighbors': 3, 'p': 2, 'weights': 'distance'} 

2. Best Estimator: 
 KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='distance') 

3. Best Score: 
 0.986526069127 

4. Confusion Matrix: 
 [[ 8  0  0  0  0  0  0  0  0  0]
 [ 0  8  0  0  0  0  0  0  0  0]
 [ 0  0  8  0  0  0  0  0  0  0]
 [ 0  0  0 11  0  0  0  0  0  0]
 [ 0  0  0  0  7  0  0  0  0  0]
 [ 0  0  0  0  0  9  0  0  0  1]
 [ 0  0  0  0  0  0  7  0  0  0]
 [ 0  0  0  0  0  0  0  8  0  0]
 [ 0  0  0  0  0  0  0  0 12  0]
 [ 0  0  0  0  0  0  0  0  0 11]] 

5. Accuracy: 
 0.988888888889 

6. Classification Report: 
              precision    recall  f1-score   support

          0       1.00      1.00      1.00         8
          1       1.00      1.00      1.00         8
          2       1.00      1.00      1.00         8
          3       1.00      1.00      1.00        11
 