## Unsupervised Methods

In [1]:
!pip install hdbscan --quiet

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs, make_swiss_roll, make_circles, make_moons
from sklearn.mixture import GaussianMixture
from sklearn.manifold import MDS, TSNE
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering,  Birch, OPTICS
from sklearn.neighbors import LocalOutlierFactor
from sklearn.covariance import EmpiricalCovariance, ShrunkCovariance
from sklearn.ensemble import IsolationForest

In [3]:
def plot_scatter(X, labels, title, 
                 title_fsize=14,
                 lab_font_size=12, 
                 cmap='winter', 
                 axis1=0, 
                 axis2=1,
                 scatter_edge_color='k'):
    
    plt.figure(figsize=(8, 6))
    scatter = plt.scatter(X[:, axis1], X[:, axis2], 
                          c=labels, cmap=cmap, s=40, 
                          edgecolor=scatter_edge_color, alpha=0.7)
    plt.colorbar(scatter, label='Cluster/Outlier Labels')
    plt.title(title, fontsize=title_fsize)
    plt.xlabel(f"Feature {axis1+1}", fontsize=lab_font_size)
    plt.ylabel(f"Feature {axis2+1}", fontsize=lab_font_size)
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.show()

## Generate data

In [4]:
n_samples = 900
n_features = 9
n_clusters = 4
random_state = 42
cmap = 'winter'

X_cluster, y_cluster = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=random_state)

In [5]:
X_swiss_roll, swiss_roll_labels = make_swiss_roll(n_samples=n_samples, noise=0.05, random_state=random_state)


In [6]:
X_circles, circles_labels = make_circles(n_samples=n_samples, factor=0.5, noise=0.05, random_state=random_state)


In [7]:
X_moons, moons_labels = make_moons(n_samples=n_samples, noise=0.05, random_state=random_state)

In [8]:
X_outliers, _ = make_blobs(n_samples=n_samples, centers=n_clusters, cluster_std=1.0, random_state=random_state)
outlier_points = np.random.uniform(low=-10, high=10, size=(50, X_outliers.shape[1]))
X_outliers = np.vstack([X_outliers, outlier_points])

## Clustering methods

### Gaussian mixture models

In [None]:
gmm = GaussianMixture(n_components=n_clusters, random_state=random_state)
gmm_labels = gmm.fit_predict(X_cluster)
plot_scatter(X_cluster, gmm_labels, title='Gaussian Mixture Model')

### K-Means clustering

In [None]:
kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)
kmeans_labels = kmeans.fit_predict(X_cluster)
plot_scatter(X_cluster, kmeans_labels, title='K-Means')

### Agglomerative Clustering

In [None]:
agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
agglomerative_labels = agglomerative.fit_predict(X_cluster)
plot_scatter(X_cluster, agglomerative_labels, title='Hierarchical Clustering (Agglomerative)')

### OPTICS clustering

In [None]:
optics = OPTICS(min_samples=5, xi=0.05, min_cluster_size=0.1)
optics_labels = optics.fit_predict(X_cluster)
plot_scatter(X_cluster, optics_labels, title='OPTICS')

### BIRCH Clustering

In [None]:
birch = Birch(n_clusters=n_clusters)
birch_labels = birch.fit_predict(X_cluster)
plot_scatter(X_cluster, birch_labels, title='BIRCH')

## Manifold Learning

### Multidimensional scaling

In [None]:
mds = MDS(n_components=2, random_state=random_state)
fitted_mds = mds.fit_transform(X_swiss_roll)
plot_scatter(fitted_mds, swiss_roll_labels, "MDS on Swiss Roll")

In [None]:
mds_circles = mds.fit_transform(X_circles)
plot_scatter(mds_circles, circles_labels, "MDS on Circles")

In [None]:
mds_moons = mds.fit_transform(X_moons)
plot_scatter(mds_moons, moons_labels, "MDS on Moons")

## T-SNE ()

In [None]:
tsne = TSNE(n_components=2, random_state=random_state)
fitted_tsne = tsne.fit_transform(X_swiss_roll)
plot_scatter(fitted_tsne, swiss_roll_labels, "t-SNE on Swiss Roll")

In [None]:
tsne_circles = tsne.fit_transform(X_circles)
plot_scatter(tsne_circles, circles_labels, "t-SNE on Circles")

In [None]:
tsne_moons = tsne.fit_transform(X_moons)
plot_scatter(tsne_moons, moons_labels, "t-SNE on Moons")

### DBScan

In [None]:
dbscan_swiss_roll = DBSCAN(eps=2.2, min_samples=5)
dbscan_labels_swiss_roll = dbscan_swiss_roll.fit_predict(X_swiss_roll)
plot_scatter(X_swiss_roll, dbscan_labels_swiss_roll, "DBSCAN on Swiss Roll", axis1=0, axis2=2)

In [None]:
dbscan_circles = DBSCAN(eps=0.2, min_samples=5)
dbscan_labels_circles = dbscan_circles.fit_predict(X_circles)
plot_scatter(X_circles, dbscan_labels_circles, "DBSCAN on Circles")

In [None]:
dbscan_moons = DBSCAN(eps=0.2, min_samples=5)
dbscan_labels_moons = dbscan_moons.fit_predict(X_moons)
plot_scatter(X_moons, dbscan_labels_moons, "DBSCAN on Moons")

## Outlier detection methods

### Local Outlier Factor (LOF)

In [None]:
lof = LocalOutlierFactor(n_neighbors=20)
lof_labels = lof.fit_predict(X_outliers)
plot_scatter(X_outliers, lof_labels, "Local Outlier Factor")

### Empircal covariance

In [None]:
emp_cov = EmpiricalCovariance()
emp_cov.fit(X_outliers)
mahal_distances = emp_cov.mahalanobis(X_outliers - np.mean(X_outliers, axis=0))
outlier_mask = mahal_distances > np.percentile(mahal_distances, 97.5)
plot_scatter(X_outliers, ~outlier_mask, "Empirical Covariance Outlier Detection")

### Isolation Forests

In [None]:
isolation_forest = IsolationForest(random_state=random_state, contamination=0.05)
isolation_labels = isolation_forest.fit_predict(X_outliers)
plot_scatter(X_outliers, isolation_labels, "Isolation Forest Outlier Detection")