In [14]:
import random
import numpy as np
import pandas as pd

originalData= pd.read_csv("ga_input_0.1.csv")
features = originalData[["Latitude", "Longitude","Frequency"]].values
originalData = originalData[["Location","Frequency","Latitude","Longitude","Cluster"]].values
features, originalData

(array([[19.115491, 72.854202, 62.      ],
        [19.058336, 72.830267, 82.      ],
        [19.055292, 72.903055, 48.      ],
        [19.019282, 72.842875, 35.      ],
        [18.933266, 72.836019, 16.      ],
        [19.086521, 72.908956, 53.      ],
        [19.119284, 72.908609, 76.      ],
        [19.096421, 72.862288, 93.      ],
        [19.137622, 72.83192 , 42.      ],
        [19.20838 , 72.842058, 27.      ],
        [18.995706, 72.830745, 65.      ],
        [19.186719, 72.836496, 17.      ],
        [18.925648, 72.82383 , 10.      ],
        [19.123011, 72.908091, 50.      ],
        [18.935955, 72.821655, 27.      ],
        [19.128251, 72.928739, 29.      ],
        [19.102918, 72.887755, 72.      ],
        [19.20892 , 72.978725, 38.      ],
        [19.01394 , 72.826691, 47.      ],
        [19.075768, 73.000236, 89.      ],
        [19.011696, 72.81807 , 20.      ],
        [19.108253, 72.86501 , 56.      ],
        [19.128151, 72.820592, 31.      ],
        [19

In [37]:


class ClusterGA:
    def __init__(self, n_clusters, population_size, max_iterations, originalData):
        self.n_clusters = n_clusters
        self.population_size = population_size
        self.max_iterations = max_iterations
        self.originalData = originalData
        
    def initialize_population(self, data):
        self.data = data
        self.population = []
        # print(data)
        count = 0
        for i in range(self.population_size):
            # clusters = [random.randint(0, self.n_clusters-1) for j in range(len(data))]
            clusters = [1 if int(originalData[j][-1]) == i else 0 for j in range(len(originalData))]
            self.population.append(clusters)
            
            count += clusters.count(1)
        print(self.population)    
        # print(count)
            
    def evaluate_fitness(self):
        fitness = []
        for individual in self.population:
            centroids = []
            cluster_frequencies = []
            for i in range(self.n_clusters):
                cluster_data = [self.data[j][:2] for j in range(len(self.data)) if individual[j] == i]
                cluster_freq = [self.data[j][2] for j in range(len(self.data)) if individual[j] == i]
                if len(cluster_data) > 0:
                    centroid = np.mean(cluster_data, axis=0)
                    variance = np.var(cluster_freq)
                else:
                    centroid = np.zeros(2)
                    variance = 0
                centroids.append(centroid)
                cluster_frequencies.append(variance)

            total_distance = 0
            for i in range(len(self.data)):
                cluster_index = individual[i]
                total_distance += np.linalg.norm(self.data[i][:2] - centroids[cluster_index])

            fitness.append(np.sum(cluster_frequencies) / total_distance)

        return fitness

        
    def select_parents(self, fitness):
        max_fitness = max(fitness)
        fitness = [max_fitness - f for f in fitness]
        total_fitness = sum(fitness)
        selection_prob = [f/total_fitness for f in fitness]
        
        parents = []
        for i in range(2):
            r = random.random()
            cum_prob = 0
            for j in range(len(self.population)):
                cum_prob += selection_prob[j]
                if cum_prob >= r:
                    parents.append(self.population[j])
                    break
                    
        return parents
        
    def crossover(self, parent1, parent2):
        crossover_point = random.randint(1, len(self.data)-2)
        child1 = parent1[:crossover_point] + parent2[crossover_point:]
        child2 = parent2[:crossover_point] + parent1[crossover_point:]
        return child1, child2
        
    def mutate(self, individual, mutation_rate):
        for i in range(len(individual)):
            if random.random() < mutation_rate:
                individual[i] = random.randint(0, self.n_clusters-1)
        return individual
        
    def run(self, data):
        self.initialize_population(data)
        best_fitness = float('inf')
        
        for i in range(self.max_iterations):
            fitness = self.evaluate_fitness()
            parents = self.select_parents(fitness)
            child1, child2 = self.crossover(parents[0], parents[1])
            child1 = self.mutate(child1, 0.01)
            child2 = self.mutate(child2, 0.01)
            self.population = [parents[0], parents[1], child1, child2] + [self.mutate(parent, 0.01) for parent in self.population[2:]]
            fitness = self.evaluate_fitness()
            best_index = np.argmin(fitness)
            best_clusters = self.population[best_index]
            if fitness[best_index] < best_fitness:
                best_fitness = fitness[best_index]
                best_clusters = self.population[best_index]
            
            print("Generation {}: Best fitness = {}, Best clusters = {}".format(i+1, best_fitness, best_clusters))
            
        
        return best_clusters

# data = np.array([
#     [19.1197 ,72.8464,10], #Andheri
#  [19.0544 ,72.8402,100],#bandra
#  [19.1645, 72.8493,3],#goregaon
#  [19.0669, 72.8774,7],#bkc
#  [19.076 , 72.8846,3],#kurla
#  [19.127 , 72.8291,11],#dnnagar
# ])

allData = np.array(originalData)
data = np.array(features)
ga = ClusterGA(n_clusters=30, population_size=30, max_iterations=100, originalData = allData)

best_clusters = ga.run(data)

print("Best clusters found:", best_clusters)

[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1

In [38]:
print(best_clusters)

[0, 0, 0, 9, 4, 13, 21, 0, 29, 25, 0, 29, 27, 0, 0, 0, 29, 29, 29, 0, 20, 0, 4, 22, 0, 1, 0, 12, 0, 8, 26, 17, 0, 0, 10, 8, 14, 21, 0, 14, 0, 11, 0, 0, 0, 24, 16, 0, 23, 0, 0, 23, 0, 2, 0, 0, 0, 0, 5, 0, 0, 0, 18, 25, 29, 0, 0, 17, 15, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 17, 29, 10, 23, 8, 14, 13, 0, 12, 0, 22, 17, 18, 0, 0, 7, 27, 25, 26, 13, 11, 0, 9, 6, 16, 0, 14, 10, 13, 0, 11, 0, 29, 1, 0, 21, 0, 0, 24, 29, 29, 3, 21, 23, 11, 0, 0, 0, 2, 0, 0, 15, 13, 1, 0, 0, 0, 25, 0, 0, 0, 2, 9, 0, 18, 1, 8, 0, 5, 22]


In [30]:
df = pd.read_csv('ga_output_0.1.csv')

In [31]:
df['GeneticCluster'] = best_clusters

In [32]:
df.head()

Unnamed: 0,Location,Latitude,Longitude,Kmeans,Frequency,Genetic,Genetic.1,GeneticCluster
0,Andheri East,19.115491,72.854202,3,62,3,4,21
1,Bandra West,19.058336,72.830267,3,82,4,3,3
2,Chembur East,19.055292,72.903055,0,48,3,1,0
3,Dadar West,19.019282,72.842875,1,35,0,4,0
4,Fort,18.933266,72.836019,1,16,1,2,13


In [33]:
df = df.rename(columns={'Cluster': 'Kmeans Cluster'})
df = df.rename(columns={'GeneticCluster': 'GeneticFinal'})

In [34]:
df.head()

Unnamed: 0,Location,Latitude,Longitude,Kmeans,Frequency,Genetic,Genetic.1,GeneticFinal
0,Andheri East,19.115491,72.854202,3,62,3,4,21
1,Bandra West,19.058336,72.830267,3,82,4,3,3
2,Chembur East,19.055292,72.903055,0,48,3,1,0
3,Dadar West,19.019282,72.842875,1,35,0,4,0
4,Fort,18.933266,72.836019,1,16,1,2,13


In [35]:
df.to_csv('ga_output_0.1.csv', index=False)