### Unsupervised Learning : Cluster Assignment

#### Paper: Alternatives to the k-means algorithm that find better clusterings

#### Author: Joaquim Marset Alsina

### Imports

In [None]:
from algorithms.kmeans import KMeans
from algorithms.fuzzy_c_means import FuzzyCMeans
from algorithms.k_harmonic_means import KHarmonicMeans
from algorithms.hybrid import Hybrid1, Hybrid2
from algorithms.gaussian_EM import GaussianEM

from datasets.birch_data import generate_birch_data
from datasets.pelleg_moore_data import generate_pelleg_moore_data
from datasets.hand_image_data import generate_hand_image_data
from datasets.clean_adult import preprocess_adult

from experiments import experiment_1, experiment_2, experiment_4
from constants import *

import os
import numpy as np
import time

### Create required folders

In [None]:
root_path = './'
results_path = os.path.join(root_path, 'results')
experiment_1_results_path = os.path.join(results_path, 'experiment_1')
experiment_2_results_path = os.path.join(results_path, 'experiment_2')
experiment_3_results_path = os.path.join(results_path, 'experiment_3')
experiment_4_results_path = os.path.join(results_path, 'experiment_4')

In [None]:
os.makedirs(results_path, exist_ok=True)
os.makedirs(experiment_1_results_path, exist_ok=True)
os.makedirs(experiment_2_results_path, exist_ok=True)
os.makedirs(experiment_3_results_path, exist_ok=True)
os.makedirs(experiment_4_results_path, exist_ok=True)

### Experiment 1: Paper's first experiment

In [None]:
grid_size = 10
num_clusters = grid_size * grid_size
points_per_cluster = 100
center_distance = 4 * np.sqrt(2)

repetitions = 1
iterations = 100
fuzzy_degree = 1.3
harmonic_p = 3.5
gem_cov_diagonal = 0.2
threshold = 0.001

In [None]:
k_means = KMeans(num_clusters, iterations, repetitions)
fuzzy_c_means = FuzzyCMeans(num_clusters, iterations, fuzzy_degree, threshold, repetitions)
k_harmonic_means = KHarmonicMeans(num_clusters, iterations, harmonic_p, threshold, repetitions)
gaussian_EM = GaussianEM(num_clusters, iterations, threshold, gem_cov_diagonal)
hybrid_1 = Hybrid1(num_clusters, iterations, harmonic_p, threshold, repetitions)
hybrid_2 = Hybrid2(num_clusters, iterations, harmonic_p, threshold, repetitions)

In [None]:
start_time = time.time()

birch_data, _, true_cluster_centers = generate_birch_data(grid_size, center_distance, points_per_cluster)

dist = np.linalg.norm(true_cluster_centers[1] - true_cluster_centers[0])
cluster_radius = dist / 4

experiment_1.perform_experiment_forgy(birch_data, true_cluster_centers, cluster_radius)
experiment_1.perform_experiment_random_partition(birch_data, true_cluster_centers, cluster_radius)

print(f'Time to run experiment 1: {(time.time() - start_time):.2f} seconds')

### Experiment 2: Paper's second experiment

### Experiment 3: Paper's hand semantic segmentation experiment

### Experiment 4: Compare K-Harmonic Means with some Sklearn algorithms