<a href="https://colab.research.google.com/github/Dr-Carlos-Villasenor/TRSeminar/blob/main/TRS12_Dim_Red.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning Seminar
## Dr. Carlos Vilaseñor
## Dimensionality Reduction



# Prapare data

Import modules

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Fetch data in OpenML


In [None]:
from sklearn.datasets import fetch_openml
x, y = fetch_openml('mnist_784', version=1, return_X_y=True)

Take only n_samples (just for speed)

In [None]:
n_samples = 5000
x=np.array(x.iloc[:n_samples,:])
y=np.array(y[:n_samples].ravel(),dtype=int)
print(x.shape, y.shape)

Draw an example (run this many times)

In [None]:
sample = np.random.randint(x.shape[0])
plt.imshow(x[sample].reshape((28,28)), cmap=plt.cm.gray)
plt.title('Target: %s'%y[sample])
plt.show()

## Digit classification with PCA and SVM

In [14]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

In [15]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.1)

In [16]:
 # Create model
model = Pipeline([('scaler', StandardScaler()),
                  ('PCA', PCA(n_components=50)),
                  ('SVM', svm.SVC(gamma=0.0001))])

In [None]:
 # Fit model
model.fit(xtrain, ytrain)

In [None]:
# Calculate scores
print('Train: ', model.score(xtrain, ytrain))
print('Test: ', model.score(xtest, ytest))

In [19]:
# Make prediction over the test set
ypred = model.predict(xtest)

In [None]:
print('Classification report: \n', classification_report(ytest, ypred))

In [None]:
cm = confusion_matrix(ytest, ypred, labels=model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                            display_labels=model.classes_)
disp.plot()

In [None]:
# Dibujar una predicción aleatoria
sample = np.random.randint(xtest.shape[0])
plt.imshow(xtest[sample].reshape((28,28)), cmap=plt.cm.gray)
plt.title('Prediction: %i' % ypred[sample])
plt.show()

## Save the model

In [None]:
# Guardar modelo
from joblib import dump
dump(model, 'mnist_classifier.joblib')

In [27]:
from joblib import load
model2 = load('mnist_classifier.joblib')

In [None]:
# Dibujar una predicción aleatoria
sample = np.random.randint(xtest.shape[0])
plt.imshow(xtest[sample].reshape((28,28)), cmap=plt.cm.gray)
ypred = model2.predict(xtest[sample].reshape(1,-1))
plt.title('Prediction: %i' % ypred)
plt.show()

## DR with t-SNE for data visualization

In [33]:
from sklearn.manifold import TSNE

In [34]:
 model = TSNE(n_components=2, n_iter=2000, n_jobs=-1)

In [None]:
x_2d = model.fit_transform(x)
x_2d.shape

In [None]:
plt.scatter(x_2d[:,0],x_2d[:,1], c=y, cmap=plt.cm.tab10)