## Data Wrangling

In [1]:
import pandas as pd
import numpy as np
from operator import attrgetter
import random, sys, time, copy
import matplotlib.pyplot as plt

In [2]:
distances_df = pd.read_csv('https://raw.githubusercontent.com/Haidarr-h/datasetOptimasi1/main/distance.csv')

In [3]:
distances_df.head()

Unnamed: 0,UserId,4,38,71,90,94,101,142,163,164,...,8206,8232,8236,8255,8271,8300,8358,8360,8507,8545
0,4,0,1280,1762,1406,1589,1312,941,1098,1020,...,1377,321,1469,1398,1753,1762,846,777,1025,1017
1,38,1280,0,681,202,385,956,1639,1003,1520,...,1817,1178,411,2029,1370,681,1603,1408,1515,1433
2,71,1762,681,0,503,338,1512,2110,1524,2002,...,2323,1660,342,2511,1926,0,2084,1890,1996,1915
3,90,1406,202,503,0,208,1082,1765,1129,1646,...,1943,1304,355,2155,1496,503,1728,1534,1640,1559
4,94,1589,385,338,208,0,1264,1948,1311,1829,...,2126,1487,226,2338,1679,338,1911,1717,1823,1742


In [12]:
'216' in distances_df.columns

True

## ALO Algorithm 

#### Graf Visits Location

In [5]:
locations_connections = {}
location_set = set()

def exist_connection(source, destination):
    return (True if (source, destination) in locations_connections else False)

def add_connection(source, destination, cost=0):
    if not exist_connection(source, destination):
        locations_connections[(source, destination)] = cost
        location_set.add(source)
        location_set.add(destination)

def get_cost_path(path):
    total_cost = 0
    for i in range(len(path)-1):
        if (path[i], path[i+1]) in locations_connections:
            total_cost = total_cost + locations_connections[(path[i], path[i+1])]
        else:
            print("Cost between locations are not found")
    return total_cost

def get_random_path(max_population):
    random_path = []
    location_list = list(location_set)
    first_location = random.choice(location_list)
    
    if first_location not in location_list:
        print("Lokasi Awal Pembentukan Path Tidak Ditemukan")
        sys.exit(1)
    
    location_list.remove(first_location)
    location_list.insert(0, first_location)

    for i in range(max_population):
        location_list_temp = location_list[1:]
        random.shuffle(location_list_temp)
        location_list_temp.insert(0, first_location)

        if location_list_temp not in random_path:
            random_path.append(location_list_temp)
    
    return random_path

def show_connections():
    print("Showing Locations Connections...")

    for location in locations_connections:
        print("Source :", location[0], "| Destination :", location[1], "| Cost", locations_connections[location])

#### Roulette Selection, Random Walk

In [10]:
def roulette_wheel_selection(ant_lion_cost):
    total_cost = sum(ant_lion_cost)
    probabilities = [f/total_cost for f in ant_lion_cost]
    cumulative_prob = np.cumsum(probabilities)

    random_number = np.random.rand()

    for i, cum_prob in enumerate(cumulative_prob):
        if random_number < cum_prob:
            return i

def find_nearest_location(pos, locations):
    return min(locations, key=lambda location: abs(location-pos))

def ant_random_walk(current_iter, total_iter, dim, lb, ub, pos):
    if current_iter < total_iter * 0.1:
        w = 1
    elif current_iter < total_iter * 0.5:
        w = 2
    elif current_iter < total_iter * 0.75:
        w = 3
    elif current_iter < total_iter * 0.9:
        w = 4
    elif current_iter < total_iter * 0.95:
        w = 5
    else:
        w = 6

    if w == 1:
        I = 1 # Ratio of Random Walk
    else:
        I = 1 + 10 ** w * (current_iter+1)//total_iter

    temp_lb = [lb[i] / I for i in range(dim)]
    temp_ub = [ub[i] / I for i in range(dim)]

    if random.random() < 0.5 :
        temp_lb = [temp_lb[i] + pos[i] for i in range(dim)]
    else:
        temp_lb = [-temp_lb[i] + pos[i] for i in range(dim)]

    if random.random() >= 0.5:
        temp_ub = [temp_ub[i] + pos[i] for i in range(dim)]
    else:
        temp_ub = [-temp_ub[i] + pos[i] for i in range(dim)]
        
    new_pos = []
    temp_locations = list(location_set.copy())

    for i in range(dim):
        x = np.cumsum(2 * (np.random.rand(total_iter, 1) > 0.5) -1)
        x[0] = 0
        a = np.min(x)
        b = np.max(x)
        c = temp_lb[i]
        d = temp_ub[i]
        temp_value = (x - a) * (d - c) / (b - a) + c
        value = int(round(temp_value[current_iter]))

        """
        algoritma :
        1. copy list lokasi kunjungan
        2. cari pendekatan dari copian itu
        3. buang pendekatan
        4. next kota
        5. ulang ke step 2
        """
        value = find_nearest_location(value, temp_locations)
        temp_locations.remove(value)
        
        new_pos.append(value)
    return new_pos

In [11]:
# uji coba random walk
pos = [4, 132, 3333, 234, 2235]
lb = [0]*len(pos)
ub = [4]*len(pos)
z = ant_random_walk(70, 1000, 5, lb, ub, pos)
z

[4, 142, 3348, 216, 2265]

#### ALO Run

In [14]:
def run_ant_lion(population_size, iterations, location_counts, lower_bound, upper_bound):
    ant_lion_position = []
    ant_lion_cost = [0]*population_size
    ant_position = []
    ant_cost = [0]*population_size
    dim = location_counts
    
    # STEP 1 : FIRST INITIALIZATION
    ant_lion_position = get_random_path(population_size)
    for i in range(len(ant_lion_position)):
        ant_lion_cost[i] = get_cost_path(ant_lion_position[i])

    ant_position = get_random_path(population_size)
    for i in range(len(ant_position)):
        ant_cost[i] = get_cost_path(ant_position[i])

    ## Check values
    if not ant_lion_position or not ant_position:
        print("Ant or Antlion variable contains nothing")
        sys.exit(1)

    elite_cost = min(ant_lion_cost)
    elite_post = ant_lion_position[ant_lion_cost.index(elite_cost)]

    # STEP 2 : THE MAIN LOOP
    for iter in range(iterations):
        ## for every ant
        for j in range(population_size):
            ## selecting antlion using roullete wheel
            index_roullete_wheel = roulette_wheel_selection(ant_lion_cost)
            
            walk_ant = ant_random_walk(iter, iterations, dim, lower_bound, upper_bound, ant_lion_position[index_roullete_wheel])
            walk_elite = ant_random_walk(iter, iterations, dim, lower_bound, upper_bound, elite_post)
            
            #ant_position[j] = [(walk_ant[j] + walk_elite[j]) // 2 for j in range(dim)]

            point = random.randint(0, location_counts-1)
            ant_position[j] = walk_elite[0:point]
            for i in walk_ant:
                if i not in ant_position[j]:
                    ant_position[j].append(i)

            
            temp_locations = list(location_set.copy())
            
            for index, location in enumerate(ant_position[j]):
                ant_position[j][index] = find_nearest_location(location, temp_locations)
                # make sure not to find the same locations
                temp_locations.remove(ant_position[j][index])

            ant_cost[j] = get_cost_path(ant_position[j])

        ## catching prey and re build the pit
        temp_position = ant_lion_position.copy()
        temp_cost = ant_lion_cost.copy()
        temp_position.extend(ant_position)
        temp_cost.extend(ant_cost)
        ant_lion_position = []
        ant_lion_cost = []
        sorted_index_score = np.argsort(temp_cost)

        for i in range(population_size):
            ant_lion_position.append(temp_position[sorted_index_score[i]])
            ant_lion_cost.append(temp_cost[sorted_index_score[i]])

        if min(ant_lion_cost) < elite_cost:
            elite_cost = min(ant_lion_cost)
            elite_post = ant_lion_position[ant_lion_cost.index(elite_cost)]
        else :
            ant_lion_position[0] = elite_post
            ant_lion_cost[0] = elite_cost

        if iter % 10 == 0:
            print("Iteration :", iter, "| Elite Cost :", elite_cost)
    
    # STEP 3 : SHOW THE RESULT
    print('The Shortest Cost is', elite_cost)
    print('The shortest Route :', elite_post)


#### Main

In [None]:
def main():
    for i in distances_df.index:
      for x, j in enumerate(distances_df.columns[1:], start=1):
        j = int(j)
        # add source, destination, dan duration cost dari dataframe
        add_connection(distances_df.iloc[i,0], j, distances_df.iloc[i, x])
    
    population_size = 300
    iterations = 1000
    location_counts = len(location_set)
    lower_bound = [0]*location_counts
    upper_bound = [location_counts-1]*location_counts

    #show_connections()

    run_ant_lion(population_size, iterations, location_counts, lower_bound, upper_bound)

main()

Iteration : 0 | Elite Cost : 279328
Iteration : 10 | Elite Cost : 270868
Iteration : 20 | Elite Cost : 270868
Iteration : 30 | Elite Cost : 270868
Iteration : 40 | Elite Cost : 270868
Iteration : 50 | Elite Cost : 269691
Iteration : 60 | Elite Cost : 269691


In [None]:
x = [2619, 3052, 3268, 2736, 3719, 2139, 4300, 2451, 3241, 5474, 2384, 5950, 4356, 7649, 6540, 6322, 2265, 2442, 1388, 4603, 5308, 3801, 3091, 1973, 6078, 760, 2713, 4027, 4484, 2680, 5143, 4398, 1848, 5655, 3348, 2354, 4758, 4936, 4535, 4794, 2717, 3120, 3242, 4357, 4322, 5227, 4859, 5958, 6432, 5706, 5261, 4150, 4669, 3040, 6048, 4661, 3214, 4655, 1407, 2698, 7304, 1540, 3606, 4120, 3897, 3118, 4839, 1631, 3218, 4966, 3234, 2192, 2670, 4531, 5500, 3359, 3647, 5895, 3408, 3648, 4100, 3436, 5757, 2002, 1468, 1824, 4525, 607, 4703, 5665, 6007, 7158, 4570, 5023, 5068, 5415, 2176, 6454, 5520, 1626, 5408, 5223, 3862, 2056, 1560, 5259, 2091, 1767, 5939, 7352, 5501, 3839, 5492, 1788, 2048, 3828, 1989, 1231, 1538, 3431, 6331, 3451, 5669, 3518, 1430, 5639, 1926, 1363, 5545, 5264, 3764, 8143, 7071, 3643, 5630, 6405, 6451, 6658, 1039, 6796, 1808, 6819, 1918, 6898, 1907, 6972, 6856, 3509, 3507, 1758, 1508, 6857, 6846, 1467, 6891, 1310, 1412, 6966, 6967, 1337, 1327, 1304, 6993, 7020, 1175, 1136, 7028, 7396, 7068, 1116, 216, 7108, 1138, 7120, 1076, 7270, 6983, 683, 7320, 7328, 1071, 985, 1073, 864, 7346, 568, 7372, 7289, 7512, 7549, 7514, 664, 940, 7629, 918, 7578, 671, 7599, 554, 7573, 7591, 653, 520, 7567, 8179, 8195, 8271, 464, 8206, 466, 8201, 213, 8236, 304, 336, 8232, 164, 280, 201, 101, 8360, 196, 163, 8177, 8255, 8300, 94, 142, 8358, 90, 71, 8507, 38, 8545, 4, 8172]
len(x)