In [1]:
from sklearn import datasets

#Loading digits dataset
digits = datasets.load_digits()

print(digits.images.shape)

n_samples = len(digits.images)


print("total no. of samples:", n_samples )

(1797, 8, 8)
total no. of samples: 1797


In [2]:
#dividing data
data, target = digits.data, digits.target
digits.data.shape   


(1797, 64)

In [3]:
digits.target.shape

(1797,)

In [4]:
#Reshaping the data
#To apply the classifier, data must be in the form of features * samples

data = digits.images.reshape(n_samples,-1)

In [5]:
data.shape  

(1797, 64)

In [6]:
#Split data
from sklearn.model_selection import train_test_split

#Here we stratify targets as we need same proportions of class labels as input dataset
#Random state = integer means the data will be of same order for a particular random_state number
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size = 0.2, random_state = 42, stratify = target)

In [7]:
#SVM classifier
from sklearn import svm
classifier = svm.SVC(kernel = 'rbf', gamma = 'auto', C = 1)
classifier.fit(X_train,y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [8]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv= 5)

In [9]:
print(accuracies)

[0.60137457 0.62283737 0.60627178 0.54545455 0.57042254]


In [10]:
print(accuracies.mean())


0.5892721596716494


In [11]:
grid_parameters = {'gamma' : [10**-i for i in range(-5,6)],'C' : [10**-i for i in range(-5,6)]  }

In [12]:
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(estimator = classifier,
                          param_grid = grid_parameters,
                          scoring = 'accuracy',
                          cv = 5, n_jobs = -1)

In [13]:
print(grid_search)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'gamma': [100000, 10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05], 'C': [100000, 10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)


In [14]:
grid_search.fit(X_train,y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'gamma': [100000, 10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05], 'C': [100000, 10000, 1000, 100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 1e-05]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [15]:
best_parameters = grid_search.best_params_
print(best_parameters)

{'C': 100000, 'gamma': 0.001}


In [16]:
best_score = grid_search.best_score_

In [17]:
print(best_score)

0.9895615866388309


In [18]:
#accuract of test data
classifier_main = svm.SVC(kernel = 'rbf', gamma = 0.001, C = 100000)
classifier_main.fit(X_train,y_train)

SVC(C=100000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [19]:
y_hat = classifier_main.predict(X_test)

In [20]:
from sklearn import metrics
print("final accuracy:", metrics.accuracy_score(y_hat, y_test))

final accuracy: 0.9916666666666667


# K Nearest Neighbors

In [21]:
from sklearn import preprocessing
X_train = preprocessing.StandardScaler().fit(X_train).transform(X_train.astype(float))
X_test = preprocessing.StandardScaler().fit(X_test).transform(X_test.astype(float))

In [22]:
from sklearn.neighbors import KNeighborsClassifier
Neighbor = KNeighborsClassifier(n_neighbors = 2)
Neighbor.fit(X_train,y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=2, p=2,
           weights='uniform')

In [23]:
accuracies = cross_val_score(estimator = Neighbor , X = X_train, y = y_train, cv= 5)

In [24]:
print(accuracies)

[0.9862543  0.97923875 0.95818815 0.97202797 0.94366197]


In [25]:
print(accuracies.mean())

0.9678742294053937


In [26]:
k_values = {'n_neighbors': [1,3,5,7,9]}
from sklearn.model_selection import GridSearchCV
KNN_grid_search = GridSearchCV(estimator = Neighbor,cv = 5, param_grid = k_values)

In [27]:
KNN_grid_search.fit(X_train,y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=2, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [1, 3, 5, 7, 9]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [28]:
best_parameters = KNN_grid_search.best_params_
print(best_parameters)

{'n_neighbors': 1}


In [29]:
best_score = KNN_grid_search.best_score_
print(best_score)

0.9756437021572721


In [30]:
Neighbor_main = KNeighborsClassifier(n_neighbors = 1)
Neighbor_main.fit(X_train,y_train)
KNN_test = Neighbor_main.predict(X_test)
print("final accuracy:", metrics.accuracy_score(KNN_test, y_test))

final accuracy: 0.9694444444444444


# Logistic regression

In [31]:
from sklearn.linear_model import LogisticRegression
LR = LogisticRegression(penalty  = 'l1', C=0.01, solver='liblinear')
LR.fit(X_train,y_train)
# smaller the c more will be regularization



LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l1', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)

In [32]:
from sklearn.model_selection import cross_val_score
accuracies_LR = cross_val_score(estimator = LR, X = X_train, y = y_train, cv= 5)



In [33]:
print("accuracies of logistic regression",accuracies_LR)

accuracies of logistic regression [0.64948454 0.66089965 0.67944251 0.64685315 0.66901408]


In [34]:
print(accuracies_LR.mean())

0.6611387860265406


In [35]:
grid_parameters = {'C' : [10**i for i in range(-5,6)]  }

In [36]:
from sklearn.model_selection import GridSearchCV
LR_grid_search = GridSearchCV(estimator = LR,
                          param_grid = grid_parameters,
                          scoring = 'accuracy',
                          cv = 5, n_jobs = -1)

In [37]:
LR_grid_search.fit(X_train,y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l1', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [38]:
best_parameters = LR_grid_search.best_params_
print(best_parameters)

{'C': 1}


In [39]:
best_score = LR_grid_search.best_score_
print(best_score)

0.9624217118997912


In [40]:
LR = LogisticRegression(penalty = 'l1',C=1, solver='liblinear')
LR.fit(X_train,y_train)



LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l1', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False)

In [41]:
Y_LR = LR.predict(X_test)

In [42]:
print("final accuracy:", metrics.accuracy_score(Y_LR, y_test))

final accuracy: 0.9722222222222222
