# Maestría en Maestría en Ciencia de Datos e Inteligencia Artificial
#### 8. Machine Learning and Deep Learning
#### Docente: Msc. Renzo Claure Aracena.

### Espacios complejos para separar

In [None]:
import matplotlib.pyplot as plt
import time
from sklearn import datasets, manifold
sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0)

In [None]:
%matplotlib inline
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection="3d")
fig.add_axes(ax)
ax.scatter(
    sr_points[:, 0], sr_points[:, 1], sr_points[:, 2], c=sr_color, s=50, alpha=0.8
)
ax.set_title("Swiss Roll in Ambient Space")
ax.view_init(azim=-66, elev=12)
_ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes)

### Implenetación de t-SNE

In [None]:
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_openml
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import time

In [None]:
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data  # Esto es un DataFrame de Pandas
y_train = mnist.target  # Esto es una Serie de Pandas

In [None]:
X.head()

In [None]:
y_train.head()

![Mi Imagen](mnist_digit_5.png)

In [None]:
X.shape

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  #Esto devuelve un array NumPy

In [None]:
# Tomar una muestra de 3000 puntos
np.random.seed(0)  
indices = np.random.choice(X_scaled.shape[0], 3000, replace=False)
x_sample = X_scaled[indices]
y_sample = y_train.iloc[indices]  #.iloc para indexar correctamente un DataFrame/Serie

In [None]:
y_sample = y_sample.astype(int)

In [None]:
# t-SNE
tsne = TSNE(n_components=2, random_state=0) #perplexity=perp, n_iter=3000 
#Perplexicity controla la cantidad de vecinos, valores elevados demasiado divididos
x_tsne = tsne.fit_transform(x_sample)

In [None]:
def plotting(X_scaled, labels,  title = 'Gráfico 3D', xlabel = 'Componente 1', ylabel='Componente 2'):
    plt.figure(figsize=(10, 8))
    scatter = plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=labels, cmap='tab10', s=5, alpha=0.6)
    legend_labels = [str(i) for i in range(len(labels.value_counts()))]  # Etiquetas de los dígitos (0-9)
    handles = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=plt.cm.tab10(i), markersize=10) for i in range(10)]
    plt.legend(handles, legend_labels, title='Digit Label', loc='upper right')
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

In [None]:
plotting(x_tsne, y_sample)

In [None]:
# t-SNE
import time
start_time = time.time()
tsne = TSNE(n_components=2, random_state=0, perplexity=100, n_iter=3000 )
#Perplexicity controla la cantidad de vecinos, valores elevados demasiado divididos
x_tsne = tsne.fit_transform(x_sample)
total_time = time.time() - start_time
print(f'El tSNE tomó: {total_time :.2f} segundos')

#Plotting
plotting(x_tsne, y_sample)

### UMAP

In [None]:
#!pip install umap-learn

In [None]:
import time
import umap
import numpy as np
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [None]:
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data  # Esto es un DataFrame de Pandas
y_train = mnist.target  # Esto es una Serie de Pandas

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)  #Esto devuelve un array NumPy

In [None]:
# Tomar una muestra de 3000 puntos
np.random.seed(0)  
indices = np.random.choice(X_scaled.shape[0], 3000, replace=False)
x_sample = X_scaled[indices]
y_sample = y_train.iloc[indices]  #.iloc para indexar correctamente un DataFrame/Serie
y_sample = y_sample.astype(int)

In [None]:
# Aplicar UMAP y medimos el tiempo
start_time = time.time()
umap_m = umap.UMAP(n_components=2, random_state=0)
x_umap = umap_m.fit_transform(x_sample)
total_time = time.time() - start_time

print(f'El UMAP tomó: {total_time :.2f} segundos')

In [None]:
y_sample.head()

In [None]:
plotting(x_umap, y_sample)

### Volvamos a la rueda

In [None]:
import matplotlib.pyplot as plt
from sklearn import datasets, manifold
import pandas as pd
import numpy as np

In [None]:
sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0)

In [None]:
%matplotlib inline
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection="3d")
fig.add_axes(ax)
ax.scatter(
    sr_points[:, 0], sr_points[:, 1], sr_points[:, 2], c=sr_color, s=50, alpha=0.8
)
ax.set_title("Swiss Roll in Ambient Space")
ax.view_init(azim=-66, elev=12)
_ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes)