Importing the modules needed for the project

In [None]:
import pandas as pd
import numpy as np
import datetime as dt
from tqdm.notebook import tqdm_notebook
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial.distance import squareform, pdist
import haversine as hs
from scipy.spatial.distance import cdist
import warnings
warnings.filterwarnings('ignore')
from create_dist_matrix import create_dist_matrix, create_dist_mat_2
from get_avail_act import get_avail_act
from get_best_action import get_best_action
from get_or_tools_sol import or_solution
from get_random_traj import get_random_traj
from train import train_model
import numpy as np


Collecting the data (change the name of the file for another data)

In [None]:

data_csv = pd.read_csv("clean_data.csv")
df = pd.DataFrame(data_csv)


Cleaning the data

In [None]:
df.drop_duplicates(subset=['CourierId','Latitude','Longitude'],keep='first',inplace=True)

df

In [None]:
df = df.dropna(axis=0)
index_list = [i for i in range(len(df))]
df = df.set_index([index_list])

df

Reinforcement learning Algorithm

In [None]:
alpha = 0.012
gamma = 0.4
def tsp_solver(dist_mat,alpha=0.2,gamma=0.8):
    #alpha is the learning rate
    #gamma is the discount factor
    n_dest = dist_mat.shape[0]
    # Train RL model
    q = train_model(dist_mat, n_train = 2000, gamma = gamma, alpha = alpha)# Get trained transition matrix

    #print(q)

    # Use model to find optimum trajectory
    state = [0]
    distance_travel = 0.
    posible_actions = get_avail_act(state, n_dest)
    while posible_actions: # until all destinations are visited
        action = get_best_action(state[-1], posible_actions, q)
        distance_travel += dist_mat[state[-1], action]
        state.append(action)
        posible_actions = get_avail_act(state, n_dest)

    #Back to warehouse
    action = 0
    distance_travel += dist_mat[state[-1], action]
    state.append(action)

    # Get Best optimization possible
    #print("\nGoogle Results: ")
    best_dist, google_route = or_solution(dist_mat)

    # Get random tour
    random_dist, random_route = get_random_traj(dist_mat)

    #Out RL results
    traj =' -> '.join([str(b) for b in state])
    #print(f"Best trajectory found with RL: \n {traj}" )
    #print(f"Total distance travelled with this traj: {distance_travel}\n")
    slow_pctg = 100*(-1+distance_travel/best_dist)
    random_pctg = 100*(-1+distance_travel/random_dist)
    return slow_pctg, traj, distance_travel, google_route, best_dist
    #print(f"RL solution is {100*(-1+distance_travel/best_dist)}% slower than google's solution")

dist_list = [0] * len(df)
df['Distance'] = dist_list



Looping the algorithm for every courier and outputing the trajectory and distance (without adding the distances to the dataframe)

In [None]:
for i in set(df.CourierId):
    print("Current ID :",i)
    new_df = df.loc[df.CourierId == i]
    new_df = new_df.set_index([[i for i in range(len(new_df))]])
    dist_matrix = [[0]*len(new_df) for _ in range(len(new_df))]

    for j in new_df.index:
        for k in new_df.index:
            loc1=(new_df.Latitude[j], new_df.Longitude[j])
            loc2=(new_df.Latitude[k], new_df.Longitude[k])
            dist_matrix[j][k] = hs.haversine(loc1,loc2)
    
    dist_matrix = np.array(dist_matrix)




    slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_matrix, alpha=alpha, gamma=gamma)

    
    print("For Courier with ID:"+str(i)+",\nThe trajectory is :",rl_route)
    print("The distance is :",rl_dist,"km")
    print("=======================================")


Looping the algorithm for every courier (Adding the distances to the dataframe)

In [None]:
distance_index_list = [0]*len(df)
df['Distance'] = distance_index_list
for i in set(df.CourierId):
    print("Current ID :",i)
    new_df = df.loc[df.CourierId == i]
    new_df = new_df.set_index([[i for i in range(len(new_df))]])
    dist_matrix = [[0]*len(new_df) for _ in range(len(new_df))]

    for j in new_df.index:
        for k in new_df.index:
            loc1=(new_df.Latitude[j], new_df.Longitude[j])
            loc2=(new_df.Latitude[k], new_df.Longitude[k])
            dist_matrix[j][k] = hs.haversine(loc1,loc2)
    
    dist_matrix = np.array(dist_matrix)

    slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_matrix, alpha=alpha, gamma=gamma)
    
    for idx in df.index:
        if df.CourierId[idx] == i:
            df.Distance[idx] = rl_dist

In [None]:
df