In [2]:
import random
import numpy as np
import pandas as pd

data = pd.read_csv("k_means_output.csv")
features = data[["latitude", "longitude","frequency"]].values
features

array([[0.07130072, 0.23420549, 0.        ],
       [0.41587112, 0.26547543, 0.43243243],
       [0.31533413, 0.36758137, 0.02702703],
       [0.45375895, 0.4039566 , 0.24324324],
       [0.93705251, 0.37460115, 0.27027027],
       [0.18466587, 0.21633695, 0.        ],
       [0.43943914, 0.65411615, 0.72972973],
       [0.3973747 , 0.54818124, 0.        ],
       [0.        , 0.17358009, 0.05405405],
       [0.08323389, 0.23037652, 0.02702703],
       [0.31115752, 0.279515  , 0.67567568],
       [1.        , 0.38864071, 0.24324324],
       [0.40692124, 0.67708998, 0.        ],
       [0.3699284 , 0.36247607, 0.08108108],
       [0.76760143, 0.40650925, 0.21621622],
       [0.51163484, 0.69814933, 0.10810811],
       [0.74463007, 0.32354818, 0.51351351],
       [0.65602625, 0.32035737, 0.2972973 ],
       [0.10352029, 0.20931717, 0.05405405],
       [0.87529833, 0.27823867, 0.18918919],
       [0.64558473, 0.82578175, 0.        ],
       [0.48389021, 0.51499681, 0.2972973 ],
       [0.

In [3]:


class ClusterGA:
    def __init__(self, n_clusters, population_size, max_iterations):
        self.n_clusters = n_clusters
        self.population_size = population_size
        self.max_iterations = max_iterations
        
    def initialize_population(self, data):
        self.data = data
        self.population = []
        for i in range(self.population_size):
            clusters = [random.randint(0, self.n_clusters-1) for j in range(len(data))]
            self.population.append(clusters)
            
    def evaluate_fitness(self):
        fitness = []
        for individual in self.population:
            centroids = []
            cluster_frequencies = []
            for i in range(self.n_clusters):
                cluster_data = [self.data[j][:2] for j in range(len(self.data)) if individual[j] == i]
                cluster_freq = [self.data[j][2] for j in range(len(self.data)) if individual[j] == i]
                if len(cluster_data) > 0:
                    centroid = np.mean(cluster_data, axis=0)
                    variance = np.var(cluster_freq)
                else:
                    centroid = np.zeros(2)
                    variance = 0
                centroids.append(centroid)
                cluster_frequencies.append(variance)

            total_distance = 0
            for i in range(len(self.data)):
                cluster_index = individual[i]
                total_distance += np.linalg.norm(self.data[i][:2] - centroids[cluster_index])

            fitness.append(np.sum(cluster_frequencies) / total_distance)

        return fitness

        
    def select_parents(self, fitness):
        max_fitness = max(fitness)
        fitness = [max_fitness - f for f in fitness]
        total_fitness = sum(fitness)
        selection_prob = [f/total_fitness for f in fitness]
        
        parents = []
        for i in range(2):
            r = random.random()
            cum_prob = 0
            for j in range(len(self.population)):
                cum_prob += selection_prob[j]
                if cum_prob >= r:
                    parents.append(self.population[j])
                    break
                    
        return parents
        
    def crossover(self, parent1, parent2):
        crossover_point = random.randint(1, len(self.data)-2)
        child1 = parent1[:crossover_point] + parent2[crossover_point:]
        child2 = parent2[:crossover_point] + parent1[crossover_point:]
        return child1, child2
        
    def mutate(self, individual, mutation_rate):
        for i in range(len(individual)):
            if random.random() < mutation_rate:
                individual[i] = random.randint(0, self.n_clusters-1)
        return individual
        
    def run(self, data):
        self.initialize_population(data)
        best_fitness = float('inf')
        
        for i in range(self.max_iterations):
            fitness = self.evaluate_fitness()
            parents = self.select_parents(fitness)
            child1, child2 = self.crossover(parents[0], parents[1])
            child1 = self.mutate(child1, 0.01)
            child2 = self.mutate(child2, 0.01)
            self.population = [parents[0], parents[1], child1, child2] + [self.mutate(parent, 0.01) for parent in self.population[2:]]
            fitness = self.evaluate_fitness()
            best_index = np.argmin(fitness)
            best_clusters = self.population[best_index]
            if fitness[best_index] < best_fitness:
                best_fitness = fitness[best_index]
                best_clusters = self.population[best_index]
            
            print("Generation {}: Best fitness = {}, Best clusters = {}".format(i+1, best_fitness, best_clusters))
            
        
        return best_clusters

# data = np.array([
#     [19.1197 ,72.8464,10], #Andheri
#  [19.0544 ,72.8402,100],#bandra
#  [19.1645, 72.8493,3],#goregaon
#  [19.0669, 72.8774,7],#bkc
#  [19.076 , 72.8846,3],#kurla
#  [19.127 , 72.8291,11],#dnnagar
# ])

data = np.array(features)
ga = ClusterGA(n_clusters=5, population_size=5, max_iterations=100)

best_clusters = ga.run(data)

print("Best clusters found:", best_clusters)

Generation 1: Best fitness = 0.013490195088290122, Best clusters = [2, 3, 2, 3, 4, 0, 2, 2, 2, 3, 3, 2, 4, 2, 0, 4, 3, 4, 4, 0, 2, 3, 4, 3, 1, 3, 0, 2, 2, 2, 4, 4, 3, 2, 0, 1, 4, 3, 1, 3, 3, 4, 1, 3, 3, 0]
Generation 2: Best fitness = 0.013487284126266118, Best clusters = [2, 3, 2, 3, 4, 0, 2, 2, 2, 3, 3, 2, 4, 2, 3, 4, 3, 4, 4, 0, 2, 3, 4, 3, 1, 3, 0, 2, 2, 2, 4, 4, 3, 2, 0, 1, 4, 3, 1, 3, 3, 4, 1, 3, 3, 0]
Generation 3: Best fitness = 0.013487284126266118, Best clusters = [2, 3, 2, 3, 4, 0, 2, 2, 2, 3, 3, 2, 4, 2, 3, 4, 3, 4, 4, 0, 2, 3, 4, 3, 1, 3, 0, 2, 2, 2, 4, 4, 3, 2, 0, 1, 4, 3, 1, 3, 3, 4, 1, 3, 3, 0]
Generation 4: Best fitness = 0.01296777477665979, Best clusters = [2, 3, 2, 3, 4, 0, 2, 2, 2, 3, 3, 2, 4, 2, 4, 4, 3, 4, 4, 0, 2, 3, 4, 3, 1, 3, 0, 2, 2, 2, 4, 4, 3, 2, 0, 1, 4, 4, 1, 3, 3, 4, 1, 3, 3, 0]
Generation 5: Best fitness = 0.01296777477665979, Best clusters = [2, 3, 2, 3, 4, 0, 2, 2, 2, 3, 3, 2, 4, 2, 4, 4, 3, 4, 4, 0, 2, 3, 4, 3, 1, 3, 0, 2, 2, 2, 4, 4, 3, 2, 0, 1, 4,

In [4]:
print(best_clusters)

[4, 3, 4, 1, 1, 2, 3, 2, 2, 3, 3, 2, 4, 3, 0, 0, 3, 4, 4, 0, 2, 3, 4, 3, 2, 0, 0, 1, 2, 3, 4, 3, 3, 1, 2, 1, 4, 2, 1, 4, 3, 4, 0, 4, 3, 0]


In [20]:
df = pd.read_csv('k_means_output.csv')

In [21]:
df['GeneticCluster'] = best_clusters

In [22]:
df.head()

Unnamed: 0,location,frequency,latitude,longitude,Cluster,GeneticCluster
0,azad maidan,0.0,0.071301,0.234205,2,4
1,bandra,0.432432,0.415871,0.265475,1,3
2,bhoiwada,0.027027,0.315334,0.367581,2,4
3,bkc,0.243243,0.453759,0.403957,1,1
4,borivali,0.27027,0.937053,0.374601,4,1


In [23]:
df = df.rename(columns={'Cluster': 'Kmeans'})
df = df.rename(columns={'GeneticCluster': 'Genetic'})

In [24]:
df.head()

Unnamed: 0,location,frequency,latitude,longitude,Kmeans,Genetic
0,azad maidan,0.0,0.071301,0.234205,2,4
1,bandra,0.432432,0.415871,0.265475,1,3
2,bhoiwada,0.027027,0.315334,0.367581,2,4
3,bkc,0.243243,0.453759,0.403957,1,1
4,borivali,0.27027,0.937053,0.374601,4,1


In [25]:
df.to_csv('k_means_genetic_0.0.csv', index=False)