<a href="https://colab.research.google.com/github/Dr-Carlos-Villasenor/TRSeminar/blob/main/TRS11_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning Seminar
## Dr. Carlos Vilaseñor
## Clustering algorithms



Import modules

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import cluster, datasets, mixture, metrics
from sklearn.preprocessing import StandardScaler

Create syntetic data

In [3]:
## Create data
np.random.seed(0)
n_samples = 1500
X = 6*[None]

# Concentric circles
xtemp, _ = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)
X[0] = StandardScaler().fit_transform(xtemp)

# moons
xtemp, _ = datasets.make_moons(n_samples=n_samples, noise=.05)
X[1] = StandardScaler().fit_transform(xtemp)

# Blobs
xtemp, _ = datasets.make_blobs(n_samples=n_samples, random_state=8)
X[2] = StandardScaler().fit_transform(xtemp)

# plane
xtemp = np.random.rand(n_samples, 2)
X[3] = StandardScaler().fit_transform(xtemp)

# anisotropic blobs
xtemp, _= datasets.make_blobs(n_samples=n_samples, random_state=170)
xtemp = np.dot(xtemp, [[0.6, -0.6], [-0.4, 0.8]])
X[4] = StandardScaler().fit_transform(xtemp)

# Blobs with diff. variances
xtemp, _ = datasets.make_blobs(n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=142)
X[5] = StandardScaler().fit_transform(xtemp)

# number of clusters
clusters = [2,2,3,3,3,3]

Draw data

In [None]:
## Dibujar
plt.figure(figsize=(27,9))
for i in range(6):
    ax = plt.subplot(2, 3, i+1)
    ax.scatter(X[i][:,0], X[i][:,1])

# Kmeans

In [None]:
y = []
for c, x in zip(clusters, X):
    model = cluster.KMeans(n_clusters=c, n_init='auto')
    model.fit(x)
    y.append(model.labels_.astype(int))

fig = plt.figure(figsize=(27,9))
fig.suptitle('Kmeans', fontsize=48)
for i in range(6):
    ax = plt.subplot(2, 3, i+1)
    ax.scatter(X[i][:,0], X[i][:,1], c=y[i])

# Spectral clustering

In [None]:
y = []
for c, x in zip(clusters, X):
    model = cluster.SpectralClustering(n_clusters=c,
                                       affinity="nearest_neighbors")
    model.fit(x)
    y.append(model.labels_.astype(int))

fig = plt.figure(figsize=(27,9))
fig.suptitle('SpectralClustering', fontsize=48)
for i in range(6):
    ax = plt.subplot(2, 3, i+1)
    ax.scatter(X[i][:,0], X[i][:,1], c=y[i])

# DBSCAN

In [None]:
y = []
eps = [0.3, 0.3, 0.3, 0.3, 0.15, 0.18]
for e, x in zip(eps, X):
    model = cluster.DBSCAN(eps=e)
    model.fit(x)
    y.append(model.labels_.astype(int))

fig = plt.figure(figsize=(27,9))
fig.suptitle('DBSCAN', fontsize=48)
for i in range(6):
    ax = plt.subplot(2, 3, i+1)
    ax.scatter(X[i][:,0], X[i][:,1], c=y[i])