# MNIST
The objective of this project is to use different algorithms from Sklearn and Tensorflow to identify hand-written digits, training our models using the MNIST dataset.

## 1. Read Data

In [1]:
#Before all, lets get the dataset from sklean dataset db.
from sklearn.datasets import fetch_openml
import numpy as np
mnist = fetch_openml('mnist_784')

In [2]:
#to know how is structured the dataset
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

## 2. Data Preprocessing

In [2]:
#First, is necessary to separate the image from the target
X, y = mnist['data'], mnist['target']

In [3]:
#know, let's separate the training set from the validation set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = X[:6000], X[6000:], y[:6000], y[6000:]

In [4]:
#For some algorithms, it is recommended to scale first the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [24]:
import matplotlib as plt
i = 0
some_x, some_y= X.transpose()[i], y[i]
some_x.rescale(28, 28)
print(some_y)
plt.imshow(some_x)

AttributeError: 'Series' object has no attribute 'rescale'

## 3. Model Creation
For this project, we will be comparing the next algorithms:

In [5]:
#Logistic Regression
#extrated from https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html
from sklearn.linear_model import LogisticRegression
#Let's create a scaled model from our scaled dataset
log_reg = LogisticRegression(C=50.0 / 5000, penalty='l1', solver='saga', tol=0.1, max_iter=1000)

In [6]:
#SVM
#extracted from https://dmkothari.github.io/Machine-Learning-Projects/SVM_with_MNIST.html
from sklearn.svm import SVC
svm = SVC()

In [8]:
#Random Forest
#extracted from https://www.kaggle.com/ashwani07/mnist-classification-using-random-forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)

In [9]:
#Neural Network (MPL)
#exctracted from https://dmkothari.github.io/Machine-Learning-Projects/MLP_with_MNIST.html
from sklearn.neural_network import MLPClassifier
mpl = MLPClassifier()

In [None]:
#CNN


## 4. Adjust Model with Historic Data

In [10]:
#Logistic Regression
log_reg.fit(X_train_scaled, y_train)

LogisticRegression(C=0.01, max_iter=1000, penalty='l1', solver='saga', tol=0.1)

In [11]:
#SVM
svm.fit(X_train, y_train)

SVC()

In [12]:
#Random Forest
rf.fit(X_train, y_train)

RandomForestClassifier()

In [13]:
#MPL
mpl.fit(X_train, y_train)

MLPClassifier()

In [None]:
#CNN

## 5. Prediction for new Data

In [None]:
#things i will use on this section
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [15]:
#Logistic Regression
log_reg_score = log_reg.score(X_test_scaled, y_test)
print(f"Logistic Regression score: {log_reg_score}")

Logistic Regression score: 0.831765625


In [19]:
#SVM
svm_pred = svm.predict(X_test)
svm_score = classification_report(y_test, svm_pred)
print(svm_score)

              precision    recall  f1-score   support

           0       0.97      0.98      0.98      6311
           1       0.97      0.98      0.98      7206
           2       0.94      0.95      0.95      6409
           3       0.95      0.93      0.94      6533
           4       0.93      0.96      0.95      6201
           5       0.94      0.93      0.94      5799
           6       0.97      0.97      0.97      6268
           7       0.96      0.95      0.96      6642
           8       0.95      0.93      0.94      6274
           9       0.93      0.93      0.93      6357

    accuracy                           0.95     64000
   macro avg       0.95      0.95      0.95     64000
weighted avg       0.95      0.95      0.95     64000



In [17]:
#Random Forest
rd_pred = rf.predict(X_test)
rd_score = classification_report(y_test, rd_pred)
print(rd_score)

              precision    recall  f1-score   support

           0       0.96      0.97      0.97      6311
           1       0.97      0.98      0.97      7206
           2       0.93      0.93      0.93      6409
           3       0.93      0.91      0.92      6533
           4       0.93      0.94      0.94      6201
           5       0.94      0.91      0.92      5799
           6       0.95      0.96      0.96      6268
           7       0.94      0.94      0.94      6642
           8       0.93      0.89      0.91      6274
           9       0.90      0.92      0.91      6357

    accuracy                           0.94     64000
   macro avg       0.94      0.94      0.94     64000
weighted avg       0.94      0.94      0.94     64000



In [20]:
#MPL
mpl_pred = mpl.predict(X_test)
mpl_score = classification_report(y_test, rd_pred)
print(rd_score)

              precision    recall  f1-score   support

           0       0.96      0.97      0.97      6311
           1       0.97      0.98      0.97      7206
           2       0.93      0.93      0.93      6409
           3       0.93      0.91      0.92      6533
           4       0.93      0.94      0.94      6201
           5       0.94      0.91      0.92      5799
           6       0.95      0.96      0.96      6268
           7       0.94      0.94      0.94      6642
           8       0.93      0.89      0.91      6274
           9       0.90      0.92      0.91      6357

    accuracy                           0.94     64000
   macro avg       0.94      0.94      0.94     64000
weighted avg       0.94      0.94      0.94     64000



In [None]:
#CNN

## 6. Visualization of Results