In [1]:
import numpy as np
import pandas as pd
import random
from scipy.spatial.distance import cdist

In [2]:
# Load data
restaurants = pd.read_csv("restaurants.csv")
customers = pd.read_csv("customers.csv")
riders = pd.read_csv("riders.csv")
riders_info = pd.read_csv("riders_info.csv")

In [3]:
# Extract coordinates
restaurant_coords = restaurants[['latitude', 'longitude']].values
customer_coords = customers[['latitude', 'longitude']].values
rider_coords = riders[['latitude', 'longitude']].values

In [4]:
# Compute distance matrices
distance_restaurant_customer = cdist(restaurant_coords, customer_coords, metric='euclidean')
distance_rider_restaurant = cdist(rider_coords, restaurant_coords, metric='euclidean')

In [5]:
def initial_assignment():
    assignments = {}
    rider_load = {rider_id: 0 for rider_id in riders_info.rider_id}
    rider_capacity = {rider_id: cap for rider_id, cap in zip(riders_info.rider_id, riders_info.capacity)}
    
    for i, customer in enumerate(customers.itertuples()):
        nearest_restaurant_idx = np.argmin(distance_restaurant_customer[:, i])
        possible_riders = [(idx, riders.iloc[idx]['rider_id']) for idx in np.argsort(distance_rider_restaurant[:, nearest_restaurant_idx])]
        
        for rider_idx, rider_id in possible_riders:
            if rider_load[rider_id] < rider_capacity[rider_id]:
                assignments[customer.customer_id] = (restaurants.iloc[nearest_restaurant_idx]['restaurant_id'], rider_id)
                rider_load[rider_id] += 1
                break
    return assignments

assignments = initial_assignment()

In [6]:
# RL-based dynamic reassignment
class RiderAssignmentRL:
    def __init__(self, num_riders, num_restaurants, num_customers):
        self.num_riders = num_riders
        self.num_restaurants = num_restaurants
        self.num_customers = num_customers
        self.q_table = np.zeros((num_riders, num_customers))
        self.learning_rate = 0.1
        self.discount_factor = 0.9
        self.epsilon = 0.2

    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, self.num_riders - 1)  # Explore
        return np.argmax(self.q_table[:, state])  # Exploit

    def update_q_table(self, rider, customer, reward):
        best_next_action = np.max(self.q_table[:, customer])
        self.q_table[rider, customer] += self.learning_rate * (reward + self.discount_factor * best_next_action - self.q_table[rider, customer])

    def train(self, epochs=1000):
        for _ in range(epochs):
            for customer in range(self.num_customers):
                assigned_rider = self.choose_action(customer)
                rest_idx = np.argmin(distance_restaurant_customer[:, customer])
                reward = - (distance_restaurant_customer[rest_idx, customer] + distance_rider_restaurant[assigned_rider, rest_idx])
                self.update_q_table(assigned_rider, customer, reward)

In [7]:
rl_model = RiderAssignmentRL(len(riders), len(restaurants), len(customers))
rl_model.train()

In [8]:
def evaluate_assignments(assignments):
    total_distance = 0
    for customer, (restaurant, rider) in assignments.items():
        rest_idx = restaurants[restaurants.restaurant_id == restaurant].index[0]
        cust_idx = customers[customers.customer_id == customer].index[0]
        rider_idx = riders[riders.rider_id == rider].index[0]
        total_distance += distance_restaurant_customer[rest_idx, cust_idx] + distance_rider_restaurant[rider_idx, rest_idx]
    return total_distance / len(assignments)

In [9]:
evaluation_score = evaluate_assignments(assignments)
print("Average Distance per Assignment:", evaluation_score)

Average Distance per Assignment: 0.008979330884239807
