In [2]:
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

class Particle:
    def __init__(self, num_clusters, num_features):
        self.position = np.random.rand(num_clusters, num_features)
        self.velocity = np.random.rand(num_clusters, num_features)
        self.best_position = self.position.copy()
        self.best_fitness = float('inf')

def fitness_function(data, particles, num_clusters):
    distances = np.zeros((len(data), num_clusters))

    for i, particle in enumerate(particles):
        for j in range(num_clusters):
            distances[:, j] = np.linalg.norm(data - particle.position[j], axis=1)

        particle_fitness = np.sum(np.min(distances, axis=1))
        
        if particle_fitness < particle.best_fitness:
            particle.best_fitness = particle_fitness
            particle.best_position = particle.position.copy()

    return particles

def update_velocity_position(particles, inertia, c1, c2, global_best_position):
    for particle in particles:
        inertia_term = inertia * particle.velocity
        cognitive_term = c1 * np.random.rand() * (particle.best_position - particle.position)
        social_term = c2 * np.random.rand() * (global_best_position - particle.position)

        particle.velocity = inertia_term + cognitive_term + social_term
        particle.position = particle.position + particle.velocity

def initialize_particles(num_particles, num_clusters, num_features):
    particles = [Particle(num_clusters, num_features) for _ in range(num_particles)]
    return particles

# Load your data and set parameters
data = np.loadtxt("./data/wikipedia_td.txt")
num_particles = 30
num_clusters = 10
num_features = data.shape[1]
inertia = 0.5
c1 = 2.0
c2 = 2.0

# Experiment with different maximum iteration values (100, 500, 1000)
max_iterations_values = [100, 500, 1000]

# Experiment with different parameter values
inertia_values = [0.3, 0.5, 0.7]
c1_values = [1.5, 2.0, 2.5]
c2_values = [1.5, 2.0, 2.5]

# Collect results for plotting
results = []

for max_iterations in max_iterations_values:
    for inertia in inertia_values:
        for c1 in c1_values:
            for c2 in c2_values:
                # Initialize particles
                particles = initialize_particles(num_particles, num_clusters, num_features)
                global_best_position = None
                global_best_fitness = float('inf')
                fitness_history = []

                # PSO Main Loop
                for iteration in range(max_iterations):
                    particles = fitness_function(data, particles, num_clusters)

                    # Update global best
                    for particle in particles:
                        if particle.best_fitness < global_best_fitness:
                            global_best_fitness = particle.best_fitness
                            global_best_position = particle.best_position.copy()

                    fitness_history.append(global_best_fitness)

                    # Update particles' velocity and position
                    update_velocity_position(particles, inertia, c1, c2, global_best_position)

                # Store the results for this combination of parameters
                results.append({
                    'max_iterations': max_iterations,
                    'inertia': inertia,
                    'c1': c1,
                    'c2': c2,
                    'best_fitness': global_best_fitness
                })
                # Plot fitness vs number of iterations
                plt.plot(range(1, max_iterations + 1), fitness_history, label=f"max_iter={max_iterations}, inertia={inertia}, c1={c1}, c2={c2}")

plt.xlabel('Number of Iterations')
plt.ylabel('Fitness Value')
plt.legend()
plt.show()

# Find the best result for each algorithm
best_results_df = pd.DataFrame(results)
best_results = best_results_df.groupby('max_iterations').apply(lambda x: x.loc[x['best_fitness'].idxmin()])

print("\nBest Results:")
print(best_results[['max_iterations', 'inertia', 'c1', 'c2', 'best_fitness']])
