In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# clustering models
from sklearn.cluster import KMeans
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.cluster import Birch
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import MeanShift

In [2]:
# Set the seed for reproducibility
np.random.seed(42)

# Step 2: Generate the toy datasets
dataset1, _ = make_blobs(n_samples=1000, n_features=2, centers=3)
dataset2, _ = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_clusters_per_class=1)
dataset3, _ = make_circles(n_samples=1000, noise=0.3)

# Step 3: Initialize the clustering algorithms
kmeans = KMeans(n_clusters=3)
aff_prop = AffinityPropagation()
dbscan = DBSCAN()
gmm = GaussianMixture(n_components=3)
birch = Birch(n_clusters=3)
agg_clust = AgglomerativeClustering(n_clusters=3)
mean_shift = MeanShift()

# Step 4: Fit each model on the datasets and make predictions
datasets = [dataset1, dataset2, dataset3]
clustering_algorithms = [kmeans, aff_prop, dbscan, gmm, birch, agg_clust, mean_shift]

cluster_assignments = []
for algo in clustering_algorithms:
    for dataset_idx, dataset in enumerate(datasets):
        algo_clusters = []
        if algo.__class__.__name__ == 'GaussianMixture':
            algo.fit(dataset)
            cluster_assignments.append(algo.predict(dataset))
        elif algo.__class__.__name__ == 'DBSCAN':
            labels = algo.fit_predict(dataset)
            unique_labels = np.unique(labels)
            cluster_assignments.append(np.where(labels == -1, len(unique_labels), labels))
        else:
            algo.fit(dataset)
            if hasattr(algo, 'labels_'):  # Some algorithms have 'labels_', others have 'predict'
                cluster_assignments.append(algo.labels_)
            else:
                cluster_assignments.append(algo.predict(dataset))
        algo_clusters.append(cluster_assignments[-1])
        plt.figure(figsize=(15, 5))
        plt.subplot(1, 3, dataset_idx + 1)
        plt.scatter(dataset[:, 0], dataset[:, 1], c=algo_clusters[-1], cmap='rainbow')
        plt.title(f"{algo.__class__.__name__} Clustering - Dataset {dataset_idx + 1}")
    plt.show()

NameError: name 'make_blobs' is not defined