# Import the Dataset

In [None]:
from sklearn.datasets import fetch_mldata 
mnist=fetch_mldata('MNIST original') 
mnist

In [None]:
X,y= mnist["data"],mnist["target"]



Output dataset
X.shape (70000,784) 
y.shape (70000,)

Explanition
There are 70,000 images,and	each image has 784 features.This is	because	each image is 28×28	pixels, and	each feature simply	represents one pixel’s intensity,from 0(white) to 255 (black).

In [1]:
#To take a peek at one digit from the dataset
%matplotlib	inline
import matplotlib
import matplotlib.pyplot as plt
some_digit = X[36000] some_digit_image = some_digit.reshape(28,28)
plt.imshow(some_digit_image,cmap = matplotlib.cm.binary,
           interpolation="nearest")
plt.axis("off") 
plt.show()

#Output looks like 5
y[36000] 
#OUTPUT 
#5.0


SyntaxError: invalid syntax (<ipython-input-1-ec70ca924430>, line 5)

# ✔Split  Train and Test

In [None]:
#To create a test set and set it aside before inspecting the data closely and train set
#The MNIST dataset is actually already split into a training set(the first 60,000images)
#and a test set(the last 10,000 images):
X_train, X_test, y_train, y_test = X[:60000],X[60000:],y[:60000],y[60000:]

# ✔Shuffle the training set

In [None]:
#This will guarantee that all cross-validation folds will be similar
#you don’t want	one	fold to be missing some digits
#some learning algorithms are sensitive	to the order of	the training instances,	and	they perform poorly	if they get many similar instances in a row. 
#Shuffling	the	dataset	ensures	that this won’t	happen

import	numpy as np
shuffle_index = np.random.permutation(60000) X_train,y_train = X_train[shuffle_index],y_train[shuffle_index]


# Training a Binary Classfier

In [None]:
#“5-detector”will be an example of a binary classifier, capable of distinguishing between just two classes,5 and not-5.
#Let’s create the target vectors for this classification task
y_train_5 = (y_train==5)#True for all 5s,False for all other digits.
y_test_5 = (y_test==5)

# Stochastic Gradient Descent (SGD) classifier

Picking a classfier and Training it
✔A good	place	to	start	is	with a Stochastic Gradient Descent (SGD) classifier,	using	Scikit-Learn’s	SGDClassifier class.	This	classifier	has	the	advantage	of	being capable	of	handling	very	large	datasetsefficiently.	✔This	is	in	part	because	SGD	deals	with	training instances	independently,	one	at	a	time(which	also	makes
SGD	well	suited	for	online	learning),	as	we will	see	later.	Let’s	create	an	SGDClassifier	and	train	it	on	the whole	training	set:
✔The	SGDClassifier	relies	on	randomness	during	training	(hence	the	name	“stochastic”).	If	you	want	reproducible	results,	you should	set	the	random_state	parameter.


In [None]:
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=42) sgd_clf.fit(X_train,y_train_5)


In [None]:
#you can use it to detect images of the number 5:
sgd_clf.predict([some_digit]) 

#Output 
#array([	True],	dtype=bool)

# Evaluate	this	Model’s	Performance

In [None]:

from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone
skfolds = StratifiedKFold(n_splits=3,random_state=42)
for train_index,test_index in skfolds.split(X_train, y_train_5):
    clone_clf = clone(sgd_clf)X_train_folds = X_train[train_index]
    y_train_folds = (y_train_5[train_index])
    X_test_fold = X_train[test_index]
    y_test_fold = (y_train_5[test_index])
        clone_clf.fit(X_train_folds, y_train_folds)
        y_pred = clone_clf.predict(X_test_fold)
        n_correct = sum(y_pred == y_test_fold)
        print(n_correct	/len(y_pred)#	prints	0.9502,	0.96565	and	0.96495

             
              
#The StratifiedKFold class performs stratified sampling to produce folds that contain a representative ratio of each class.
             # At each iteration  the code creates a clone of the classifier, trains that clone on the training folds,
              #and makes predictions on the test fold.
              #Then it counts the number of correct predictions and outputs the ratio of correct predictions.
              
              
              
##Alternative way To do so(THE BEST WAY TO DO US)
#Measuring Accuracy Using Cross-Validation
from sklearn.model_selection import  cross_val_score 
cross_val_score(sgd_clf,X_train,y_train_5,cv=3,scoring="accuracy") 
#array([0.9502,0.96565,0.96495])


In [None]:
#let’s look at a very dumb classifier that just classifies every single image in the “not-5” class:
from sklearn.base import BaseEstimator
class Never5Classifier(BaseEstimator):
    def fit(self, X,y=None):
        pass
    def	predict(self,X):
        return np.zeros((len(X),1),dtype=bool)


In [None]:
#Model Accuracy
never_5_clf = Never5Classifier() 
cross_val_score(never_5_clf,X_train,y_train_5,cv=3,	scoring="accuracy")
#Output
#array([0.909,0.90715,0.9128])

In [None]:
#confusion matrix
#To compute the confusion matrix, you first need to have a set of predictions, so they can be compared to the actual target

from sklearn.model_selection import cross_val_predict
y_train_pred =  cross_val_predict(sgd_clf, X_train,y_train_5,cv=3)

from sklearn.metrics import confusion_matrix
confusion_matrix(y_train_5, y_train_pred
                   
                 #output
                 #array([[53272,1307],
                       # [1077,4344]])


In [None]:
from sklearn.metrics import precision_score,recall_score 
precision_score(y_train_5,y_train_pred)#==4344/(4344+1307) 
#0.76871350203503808 
recall_score(y_train_5,y_train_pred)#== 4344/(4344+1077) 
#0.80132816823464303

In [None]:
#It it often convenient to combine precision and recall intto a single metric called the F1 score
from sklearn.metrics import f1_score
f1_score(y_train_5,	y_train_pred) 
#0.78468208092485547
