# Natural Computing - Assignment 2 - Swarm Intelligence
## Exercise 3 - PSO
#### Submission by group 25 (Chihab Amghane, Max Driessen, Jordy Naus)

This file contains our solution to exercise 3 of the "Swarm Intelligence" assignment of the Natural Computing course.

**NOTE:** ("!!!" denotes remarks/TODOs)

### Imports

In [1]:
from sklearn.cluster import KMeans # !!! Temporary (?), see K-means clustering algorithm below
from sklearn import datasets
import numpy as np
import random

### PSO clustering algorithm

In [2]:
def PSO_clustering(n_clusters, n_particles, n_iterations, data, omega=0.7298, alpha_1=1.49618, alpha_2=1.49618, verbose=False):
    # Compute number of dimensions
    n_dims = data.shape[1]
    
    # Compute range of input data in every dimension (to ensure reasonable initial particles)
    ranges = [(min(data[:,dim]), max(data[:,dim])) for dim in range(n_dims)]
    
    # Initialize particles with random centroids
    particles, speeds = [], []
    for n in range(n_particles):
        # Generate a random particle by creating the desired number of random centroids
        particle = np.array([[random.uniform(r[0]-10,r[1]+10) for r in ranges] for c in range(n_clusters)])
        # Add the particle to the list of particles
        particles.append(particle)
        # Add a speed of 0 to the list of speeds !!! 0 or random?
        speeds.append(np.zeros(particle.shape)) # speeds.append(np.random.uniform(-1,1,particle.shape))
    
    # Initialize local & global bests
    local_bests = [([],float('inf')) for particle in particles]
    global_best = ([],float('inf'))
    
    # Perform iterations until termination
    for iteration in range(n_iterations):
        if verbose:
            print(f"iteration {iteration}")
        
        # For each particle:
        for i,particle in enumerate(particles):
            # Compute fitness (quantization error)
            fitness = quantization_error(particle, data)
            # Update local best
            if fitness < local_bests[i][1]:
                local_bests[i] = (particle, fitness)
        
        # Update global best
        best_in_iteration = np.argmin([local_best[1] for local_best in local_bests])
        if local_bests[best_in_iteration][1] < global_best[1]:
            global_best = local_bests[best_in_iteration]
        
        # Update particles
        for i in range(n_particles):
            r_1 = np.random.uniform(0,1,particle.shape)
            r_2 = np.random.uniform(0,1,particle.shape)
            speeds[i] = omega*speeds[i] \
                      + np.multiply(alpha_1*r_1, local_bests[i][0]-particles[i]) \
                      + np.multiply(alpha_2*r_2, global_best[0]-particles[i])
            particles[i] = particles[i] + speeds[i]
    
    # Return the global best
    return global_best[0]

### Quantization error

In [3]:
def quantization_error(particle, X):
    # Retrieve number of clusters
    n_clusters = len(particle)
    
    # Assign datapoints to clusters; store computed distances to corresponding centroids
    clusters = {i:[] for i in range(n_clusters)}
    for datapoint in X:
        # Compute Euler distance between this datapoint and all centroids
        distances = [np.sqrt(np.sum((centroid-datapoint)**2)) for centroid in particle]
        # Assign datapoint to centroid with smallest distance, by storing distance in the corresponding array
        clusters[np.argmin(distances)].append(min(distances))
    
    # Compute quantization error as described in paper
    error = 0
    for c in clusters:
        error += sum(clusters[c])/len(clusters[c]) if clusters[c] else 0
    error = error/n_clusters
    
    # Return the computed error
    return error

### K-means clustering algorithm

In [4]:
# !!! We should probably implement this ourselves...
def KMeans_clustering(n_clusters, n_init, max_iter, data):
    kmeans = KMeans(init="random", n_clusters=n_clusters, n_init=n_init, max_iter=max_iter)
    kmeans.fit(data)
    return kmeans.cluster_centers_

### Loading datasets

#### Artificial dataset I 
(as described [here](https://scholar.google.nl/scholar?hl=nl&as_sdt=0%2C5&q=Van+der+Merwe%2C+D.+W.%2C+and+Andries+Petrus+Engelbrecht.+%22Data+clustering+using+particle+swarm+optimization%22&btnG=))

In [5]:
artificial_dataset_size = 400

artificial_X = np.array([(random.uniform(-1,1), random.uniform(-1,1)) for i in range(artificial_dataset_size)])
artificial_Y = np.array([int((z_1 >= 0.7) or ((z_1 <= 0.3) and (z_2 >= -0.2 - z_1))) for (z_1,z_2) in artificial_X])
artificial_n_clusters = len(np.unique(artificial_Y))

#### Iris dataset

In [6]:
iris = datasets.load_iris()

iris_X = iris.data
iris_Y = iris.target
iris_n_clusters = len(np.unique(iris_Y))

### Testing

!!! Don't know if this is sufficient

#### Artificial dataset I

In [7]:
artificial_solution_PSO = PSO_clustering(artificial_n_clusters, 5, 100, artificial_X)
artificial_solution_KMeans = KMeans_clustering(artificial_n_clusters, 5, 100, artificial_X)

In [8]:
artificial_error_PSO = quantization_error(artificial_solution_PSO, artificial_X)
artificial_error_KMeans = quantization_error(artificial_solution_KMeans, artificial_X)
print(f"PSO quantization error on artificial dataset I: {artificial_error_PSO}")
print(f"KMeans quantization error on artificial dataset I: {artificial_error_KMeans}")

PSO quantization error on artificial dataset I: 0.37508839492160023
KMeans quantization error on artificial dataset I: 0.5676370503289512


#### Iris dataset

In [9]:
iris_solution_PSO = PSO_clustering(iris_n_clusters, 5, 100, iris_X)
iris_solution_KMeans = KMeans_clustering(iris_n_clusters, 5, 100, iris_X)

In [10]:
iris_error_PSO = quantization_error(iris_solution_PSO, iris_X)
iris_error_KMeans = quantization_error(iris_solution_KMeans, iris_X)
print(f"PSO quantization error on iris dataset: {iris_error_PSO}")
print(f"KMeans quantization error on iris dataset: {iris_error_KMeans}")

PSO quantization error on iris dataset: 0.6295904619016034
KMeans quantization error on iris dataset: 0.6465653848597094
