In [None]:
from typing import List
from numpy import np

(a) Implement the PSO algorithm for clustering described in “Van der Merwe, D.
W., and Andries Petrus Engelbrecht. ”Data clustering using particle swarm
optimization.” Evolutionary Computation, 2003. CEC’03. The 2003 Congress
on. Vol. 1. IEEE, 2003.” (see also the lecture’s slides on swarm intelligence).

In [None]:
def init_population(size:int, K:int):
    population = np.zeros([size, K])
    for i in range(len(population)):
        population[i] = np.random.uniform(-1,1,K)
    return population

def calc_fitness(assignments, dataset, particle):
    num_clusters = len(particle)
    fitness = 0
    for cluster in range(num_clusters):
        boolean_mask = assignments==cluster
        fitness += np.sum(np.abs(assignments[boolean_mask] - dataset[boolean_mask])) / len(assignments[boolean_mask])
    fitness /= num_clusters
    return fitness

def update_velocity(vel, local_best, global_best, current, omega, alpha1, alpha2, r1, r2):
    return omega*vel + alpha1*r1*(local_best - current) + alpha2*r2*(global_best - current)

def update_position(current, velocity):
    new_pos = current + velocity
    return new_pos



def k_means_PSO(K:int, dataset:np.ndarray, pop_size:int, max_iter:int, omega:float, alpha1:float, alpha2:float, r1:float, r2:float):
    """
    Clusters the dataset into K clusters using a PSO algorithm.
    """
    population = init_population(pop_size, K)
    local_best = population.copy()
    velocities = np.zeros([pop_size, K])
    for i in range(max_iter):
        for idx, particle in enumerate(population):
            assignments = []
            for z in dataset:
                distances = np.array([abs(z - c) for c in particle])
                assignments.append(np.argmin(distances))
            fitness = calc_fitness(assignments, dataset, particle)
            local_best[idx] = particle if fitness < calc_fitness(local_best[idx]) else local_best[idx] 
        global_best = local_best[np.argmin([calc_fitness(p) for p in local_best])]
        for i in range(pop_size):
            velocities[i] = update_velocity(velocities[i], local_best[i], global_best, population[i], omega, alpha1, alpha2, r1, r2)
            population[i] = update_position(population[i], velocities[i])
    return global_best
            




(b) Implement the k-means clustering method.

(c) Generate Artificial dataset 1 using the description given in the above mentioned
paper.
