In [8]:
from create_dist_matrix import create_dist_matrix, create_dist_mat_2
from get_avail_act import get_avail_act
from get_best_action import get_best_action
from get_or_tools_sol import or_solution
from get_random_traj import get_random_traj
from train import train_model
import numpy as np
#Definitions
n_dest = 20 # Set number of destinations
dist_mat = create_dist_matrix(n_dim = n_dest, opt = 1) # Create distance matrix, opt = 0 is random, opt = 2 is fixed example
#dist_mat = create_dist_mat_2() # Use googles example

def tsp_solver(dist_mat,alpha=0.2,gamma=0.8):
    #alpha is the learning rate
    #gamma is the discount factor
    n_dest = dist_mat.shape[0]
    # Train RL model
    q = train_model(dist_mat, n_train = 2000, gamma = gamma, alpha = alpha)# Get trained transition matrix

    #print(q)

    # Use model to find optimum trajectory
    state = [0]
    distance_travel = 0.
    posible_actions = get_avail_act(state, n_dest)
    while posible_actions: # until all destinations are visited
        action = get_best_action(state[-1], posible_actions, q)
        distance_travel += dist_mat[state[-1], action]
        state.append(action)
        posible_actions = get_avail_act(state, n_dest)

    #Back to warehouse
    action = 0
    distance_travel += dist_mat[state[-1], action]
    state.append(action)

    # Get Best optimization possible
    #print("\nGoogle Results: ")
    best_dist, google_route = or_solution(dist_mat)

    # Get random tour
    random_dist, random_route = get_random_traj(dist_mat)

    #Out RL results
    traj =' -> '.join([str(b) for b in state])
    #print(f"Best trajectory found with RL: \n {traj}" )
    #print(f"Total distance travelled with this traj: {distance_travel}\n")
    slow_pctg = 100*(-1+distance_travel/best_dist)
    random_pctg = 100*(-1+distance_travel/random_dist)
    return slow_pctg, traj, distance_travel, google_route, best_dist
    #print(f"RL solution is {100*(-1+distance_travel/best_dist)}% slower than google's solution")

best_pctg = 100
alpha = 0.012
gamma = 0.4
#for alpha in np.linspace(0.012,0.012,1):
#    for gamma in np.linspace(0.4 ,0.4,100):
"""for _ in range(20):
        slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_mat, alpha=alpha, gamma=gamma)
        if slow_pctg < best_pctg:
            best_pctg = slow_pctg
            if slow_pctg < 0:
                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {-np.around(slow_pctg,decimals=1)}% FASTER than google's solution")
                print(f"RL route:     {rl_route}; distance: {rl_dist}")
                print(f"Google route {google_route}; distance: {google_dist}\n")
            else:
                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {np.around(slow_pctg,decimals=1)}% slower than google's solution")
"""

'for _ in range(20):\n        slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_mat, alpha=alpha, gamma=gamma)\n        if slow_pctg < best_pctg:\n            best_pctg = slow_pctg\n            if slow_pctg < 0:\n                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {-np.around(slow_pctg,decimals=1)}% FASTER than google\'s solution")\n                print(f"RL route:     {rl_route}; distance: {rl_dist}")\n                print(f"Google route {google_route}; distance: {google_dist}\n")\n            else:\n                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {np.around(slow_pctg,decimals=1)}% slower than google\'s solution")\n'

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from tqdm.notebook import tqdm_notebook
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial.distance import squareform, pdist
import haversine as hs
from scipy.spatial.distance import cdist
from create_dist_matrix import create_dist_matrix, create_dist_mat_2
from get_avail_act import get_avail_act
from get_best_action import get_best_action
from get_or_tools_sol import or_solution
from get_random_traj import get_random_traj
from train import train_model
import numpy as np


In [4]:
import pandas as pd

data_csv = pd.read_csv("clean_data.csv")
df = pd.DataFrame(data_csv)


In [5]:
df.drop_duplicates(subset=['CourierId','Latitude','Longitude'],keep='first',inplace=True)

df

Unnamed: 0,CourierId,Latitude,Longitude
0,324293,55.605452,12.583987
3,324293,55.631359,12.656990
8,324293,55.638429,12.634550
12,324293,,
15,324293,55.656790,12.604109
...,...,...,...
254923,0,55.569114,12.618886
254924,0,55.568978,12.619223
254925,0,55.568047,12.618471
254926,0,55.570509,12.620062


In [6]:
df = df.dropna(axis=0)
index_list = [i for i in range(len(df))]
df = df.set_index([index_list])

df

Unnamed: 0,CourierId,Latitude,Longitude
0,324293,55.605452,12.583987
1,324293,55.631359,12.656990
2,324293,55.638429,12.634550
3,324293,55.656790,12.604109
4,324293,55.657004,12.604765
...,...,...,...
67658,0,55.569114,12.618886
67659,0,55.568978,12.619223
67660,0,55.568047,12.618471
67661,0,55.570509,12.620062


In [21]:
dist_list = [0] * len(df)
df['Distance'] = dist_list
new_df = df.loc[df.CourierId == 324293]
new_df = new_df.set_index([[i for i in range(len(new_df))]])
dist_matrix = [[0]*len(new_df) for _ in range(len(new_df))]

for j in new_df.index:
    for k in new_df.index:
        loc1=(new_df.Latitude[j], new_df.Longitude[j])
        loc2=(new_df.Latitude[k], new_df.Longitude[k])
        dist_matrix[j][k] = hs.haversine(loc1,loc2)


In [22]:
dist_list = [0] * len(df)
df['Distance'] = dist_list
new_df = df.loc[df.CourierId == 324293]
new_df = new_df.set_index([[i for i in range(len(new_df))]])
dist_matrix = [[0]*len(new_df) for _ in range(len(new_df))]

for j in new_df.index:
    for k in new_df.index:
        loc1=(new_df.Latitude[j], new_df.Longitude[j])
        loc2=(new_df.Latitude[k], new_df.Longitude[k])
        dist_matrix[j][k] = hs.haversine(loc1,loc2)

dist_matrix = np.array(dist_matrix)

slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_matrix, alpha=alpha, gamma=gamma)
print(rl_route)
print(rl_dist)


"""slow_pctg, rl_route, rl_dist, google_route, google_dist = tsp_solver(dist_matrix, alpha=alpha, gamma=gamma)
print(rl_dist)
print("reach")
for idx in df.index:
    if df.CourierId[idx] == 324293:
        df.Distance[idx] = rl_dist"""
#Definitions
n_dest = 20 # Set number of destinations
#dist_mat = create_dist_matrix(n_dim = n_dest, opt = 2) # Create distance matrix, opt = 0 is random, opt = 2 is fixed example
#dist_mat = create_dist_mat_2() # Use googles example

def tsp_solver(dist_mat,alpha=0.2,gamma=0.8):
    #alpha is the learning rate
    #gamma is the discount factor
    n_dest = dist_mat.shape[0]
    # Train RL model
    q = train_model(dist_mat, n_train = 2000, gamma = gamma, alpha = alpha)# Get trained transition matrix

    #print(q)

    # Use model to find optimum trajectory
    state = [0]
    distance_travel = 0.
    posible_actions = get_avail_act(state, n_dest)
    while posible_actions: # until all destinations are visited
        action = get_best_action(state[-1], posible_actions, q)
        distance_travel += dist_mat[state[-1], action]
        state.append(action)
        posible_actions = get_avail_act(state, n_dest)

    #Back to warehouse
    action = 0
    distance_travel += dist_mat[state[-1], action]
    state.append(action)

    # Get Best optimization possible
    #print("\nGoogle Results: ")
    best_dist, google_route = or_solution(dist_mat)

    # Get random tour
    random_dist, random_route = get_random_traj(dist_mat)

    #Out RL results
    traj =' -> '.join([str(b) for b in state])
    #print(f"Best trajectory found with RL: \n {traj}" )
    #print(f"Total distance travelled with this traj: {distance_travel}\n")
    slow_pctg = 100*(-1+distance_travel/best_dist)
    random_pctg = 100*(-1+distance_travel/random_dist)
    return slow_pctg, traj, distance_travel, google_route, best_dist
    #print(f"RL solution is {100*(-1+distance_travel/best_dist)}% slower than google's solution")



0 -> 12 -> 11 -> 10 -> 3 -> 4 -> 2 -> 1 -> 8 -> 5 -> 9 -> 14 -> 6 -> 17 -> 13 -> 16 -> 15 -> 7 -> 0
50.93023369256136


  slow_pctg = 100*(-1+distance_travel/best_dist)


In [None]:
best_pctg = 100
alpha = 0.012
gamma = 0.4
#for alpha in np.linspace(0.012,0.012,1):
#    for gamma in np.linspace(0.4 ,0.4,100):
for _ in range(15):
        print('lalala')
        print(google_dist)
        if slow_pctg < best_pctg:
            print("test")
            best_pctg = slow_pctg
            if slow_pctg < 0:
                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {-np.around(slow_pctg,decimals=1)}% FASTER than google's solution")
                print(f"RL route:     {rl_route}; distance: {rl_dist}")
                print(f"Google route {google_route}; distance: {google_dist}\n")
            else:
                print(f"\nBest solution so far with parameters alpha:{alpha}, gamma:{gamma}, is {np.around(slow_pctg,decimals=1)}% slower than google's solution")
                print(f"RL route:     {rl_route}; distance: {rl_dist}")
                print(f"Google route {google_route}; distance: {google_dist}\n")

In [11]:


df

  slow_pctg = 100*(-1+distance_travel/best_dist)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.Distance[idx] = rl_dist


0.533305590519197
reach


Unnamed: 0,CourierId,Latitude,Longitude,Distance
0,324293,55.605452,12.583987,0
1,324293,55.631359,12.656990,0
2,324293,55.638429,12.634550,0
3,324293,55.656790,12.604109,0
4,324293,55.657004,12.604765,0
...,...,...,...,...
67658,0,55.569114,12.618886,0
67659,0,55.568978,12.619223,0
67660,0,55.568047,12.618471,0
67661,0,55.570509,12.620062,0


In [157]:
from scipy.spatial.distance import cdist

ENV_SIZE = 10
N_STOPS = 100

# Creating the stops using numpy random points generator
xy = np.random.rand(N_STOPS,2)*ENV_SIZE

# Computing the distances between each points
# Here use euclidean distances, but any metric would do
# This distance matrix can actually represent a time, a distance or something else
distance_matrix = cdist(xy,xy)
xy

array([[3.86720192, 5.24967896],
       [6.41316922, 0.81735344],
       [9.08371871, 3.5797291 ],
       [1.8633534 , 1.74582271],
       [0.63276242, 9.02389088],
       [7.53304709, 6.93023763],
       [9.43074531, 1.55456606],
       [7.94045598, 3.10760419],
       [2.59621871, 1.24961026],
       [6.58695436, 0.48700354],
       [6.31024207, 2.82693173],
       [7.31852499, 2.91009116],
       [1.81809317, 1.0374901 ],
       [9.94963102, 9.96867959],
       [2.30431891, 7.2392988 ],
       [5.40669428, 6.69005678],
       [3.53800473, 0.97321346],
       [1.22051403, 9.22319772],
       [2.04857168, 2.16583188],
       [4.89684612, 2.43436635],
       [4.56445032, 6.72176783],
       [1.11751294, 0.66456881],
       [3.24951457, 7.29648156],
       [3.9654942 , 4.50790419],
       [1.44200184, 3.56737749],
       [1.80252861, 8.76572537],
       [8.91304562, 7.57237894],
       [2.37314362, 0.04546939],
       [1.8383833 , 9.05550752],
       [0.6443261 , 5.54327919],
       [0.

In [163]:
class DeliveryEnvironment:

    def reset(self):
        """Restart the environment for experience replay
        Returns the first state
        """
        pass

    def step(self,a):
        """Takes an action in a given state
        Returns:
            s_next: the next state
            reward: the reward for such action
            done: if the simulation is done
        """
        pass

    def render(self):
        """Visualize the environment state
        """
        pass

In [164]:
from scipy.spatial.distance import cdist
Q = cdist(xy,xy)

In [166]:
class QAgent():
    def __init__(self,states_size,actions_size,epsilon = 1.0,
    epsilon_min = 0.01,epsilon_decay = 0.999,gamma = 0.95,lr = 0.8):
        self.states_size = states_size
        self.actions_size = actions_size
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.gamma = gamma
        self.lr = lr
        self.Q = self.build_model(states_size,actions_size)


    def build_model(self,states_size,actions_size):
        Q = np.zeros([states_size,actions_size])
        return Q


    def train(self,s,a,r,s_next):
        self.Q[s,a] = self.Q[s,a] + self.lr * (r + self.gamma*np.max(self.Q[s_next,a]) - self.Q[s,a])

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


    def act(self,s):

        q = self.Q[s,:]

        if np.random.rand() > self.epsilon:
            a = np.argmax(q)
        else:
            a = np.random.randint(self.actions_size)

        return a

In [167]:
def run_episode(env,agent,verbose = 1):

    s = env.reset()
    agent.reset_memory()
    max_step = env.n_stops
    episode_reward = 0
    
    i = 0
    while i < max_step:

        # Remember the states
        agent.remember_state(s)

        # Choose an action
        a = agent.act(s)
        
        # Take the action, and get the reward from environment
        s_next,r,done = env.step(a)

        # Tweak the reward
        r = -1 * r
        
        if verbose: print(s_next,r,done)
        
        # Update our knowledge in the Q-table
        agent.train(s,a,r,s_next)
        
        # Update the caches
        episode_reward += r
        s = s_next
        
        # If the episode is terminated
        i += 1
        if done:
            break
            
    return env,agent,episode_reward

Best solution so far with parameters alpha:0.012, gamma:0.4, is 4.0% slower than google's solution