# Libraries 

## Models 

In [None]:
from sklearn.linear_model import SGDClassifier


## Evaluation 

In [None]:
from sklearn.model_selection import cross_val_score


# Visualization 

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

# MNIST data set: 28x28 pixels of digits
# greyscale: cmap=binary

some_digit = X[0]
some_digit_image = some_digit.reshape(28, 28)
plt.imshow(some_digit_image, cmap=mpl.cm.binary)
plt.axis("off")

save_fig("some_digit_plot")
plt.show()

# Analysis 

## Confusion Matrix

In [None]:
# Confusion Matrix: find insights from results
# array([[no. of True Neg, no. of False Pos],
#        [no. of False Neg, no. of True Pos]])

In [2]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix

y_train_pred = cross_val_predict(sgd_clf, X_train, y_train_5, cv=3)

# Use fo confusion matrix: 
confusion_matrix(y_train_5, y_train_pred)

'''
Actual result:
array([[53892,   687],
       [ 1891,  3530]])
       
Real Result:
array([[54579,     0],
       [    0,  5421]])
'''

# Models

## SGD Classifier 

In [None]:
# Advantages: 
# 1. Deal with large amount of data
# 2. Can independently deal with each sample
# 3. Suitable for on-line learning

In [None]:
# Step1: Create a model instance
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(max_iter=1000, tol=1e-3, random_state=42)
sgd_clf.fit(X_train, y_train_5)

# Step2: Make predictions
sgd_clf.predict([some_digit]) # array([ True])

# Step3: Scoring using cross_val_score
from sklearn.model_selection import cross_val_score
score = cross_val_score(sgd_clf, X_train, y_train_5, cv=3, scoring="accuracy")

In [None]:
# Step4: Other scoring method such as StratifiedKFold:

from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

skfolds = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

for train_index, test_index in skfolds.split(X_train, y_train_5):
    clone_clf = clone(sgd_clf)
    X_train_folds = X_train[train_index]
    y_train_folds = y_train_5[train_index]
    X_test_fold = X_train[test_index]
    y_test_fold = y_train_5[test_index]

    clone_clf.fit(X_train_folds, y_train_folds)
    y_pred = clone_clf.predict(X_test_fold)
    n_correct = sum(y_pred == y_test_fold)
    print(n_correct / len(y_pred))
'''
0.9669
0.91625
0.96785
'''