## Test per progetto AI

E' necessario caricare i file roads_small.json e nodes_small.json

In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
import math
import random

In [2]:
#%matplotlib inline

In [3]:
nodes = json.load(open('nodes_small.json', 'r'))
roads = json.load(open('roads_small.json', 'r'))
print("Num nodes", len(nodes))
print("Num roads", len(roads))

Num nodes 2341
Num roads 10325


In [4]:
roads[1] #roads are indicized as 0,1,2...

{'directed': False,
 'discriminator': '957918184',
 'distance': 461.10051922849374,
 'p1': '3006329516',
 'p2': '427043064',
 'sidewalks': [False, False]}

In [5]:
roads[1]['p1']

'3006329516'

In [6]:
nodes[0]

{'coordinates': [12.53, 45.46], 'id': '3006329516'}

Create two maps as lookup tables for the nodes

In [7]:
#create a lookup table for point coordinates
node_coords = {point['id']:tuple(point['coordinates']) for point in nodes} #dictonary initialization with comprehension
coords_node = {tuple(point['coordinates']):point['id'] for point in nodes} #tuples are hashable-> can be key to a dictionary

In [8]:
print(node_coords[roads[11]['p1']]) #retrieve coordinates from node ID

(12.27, 45.46)


A questo punto bisogna generare le query degli utenti:

semplificazione: l'orario non influisce sui tempi di percorrenza -> dipendono solo dalla distanza

Consider each trip as starting or ending in one of the nodes: the function below serves to approximate the closest node on the map

Regular transfer (not related to pendolarism) is modeled as a gaussian random variable with respect to location ($\mu = 15km, \sigma = 10km$)  

In [14]:
def generate_ratings(n_driver):
  mean, std = 3.5, 0.5
  x = np.random.normal(mean, std, size=(n_driver, 1))
  ratings = np.empty(shape = (n_driver, 1))
  for i in range(n_driver):
    y = math.ceil(x[i])
    if (y - x[i] > 0.5) :
      ratings[i] = y - 0.5
    else :
      ratings[i] = y
      
  return ratings


In [29]:
# pool of end points 
# define a random number of destinations (e.g. from 8 to 12)
num_dest = random.randrange(8, 12, 1)
pool_dest = []
for i in range(num_dest):
  rand = random.randrange(len(node_coords))
  id_dest = list(node_coords.keys())[rand]
  coord_dest = list(node_coords.values())[rand]
  pool_dest.append({'id':id_dest, 'coordinates':coord_dest})

print(pool_dest)

[{'id': '8039450669', 'coordinates': (11.78, 45.34)}, {'id': '8789265183', 'coordinates': (12.39, 45.59)}, {'id': '2999709445', 'coordinates': (12.2, 45.45)}, {'id': '48150163', 'coordinates': (12.52, 45.62)}, {'id': '2539798832', 'coordinates': (11.81, 45.44)}, {'id': '653943312', 'coordinates': (11.99, 45.25)}, {'id': '4587689364', 'coordinates': (12.54, 45.64)}, {'id': '330870502', 'coordinates': (11.92, 45.39)}]


In [54]:
def generate_trips(n):
  # needs to define, for each driver/rider, one random starting point, and peak from the pool of destinations a random end points
  trips_start = []
  trips_dest = []
  for i in range(n):
    # start point
    rand_s = random.randrange((len(node_coords)))
    id_start = list(node_coords.keys())[rand_s]
    coord_start = list(node_coords.values())[rand_s]
    trips_start.append({'id':id_start, 'coordinates':coord_start})
    # end point
    rand_d = random.randrange(num_dest)
    dest = pool_dest[rand_d]
    trips_dest.append(dest)

  return trips_start, trips_dest


In [53]:
def generate_schedules(n):
  #time probability mass: how many requests per 2-hour slot in percentage
  time_prob = {   6 : 0.2,
                  8 : 0.1,
                  10 : 0.07,
                  12 : 0.1,
                  14 : 0.07,
                  16 : 0.15,
                  18 : 0.15,
                  20 : 0.05,
                  22 : 0.05,
                  0 : 0.01,
                  2 : 0.01,
                  4 : 0.04 }

  times = []
  hours = list(time_prob.keys())
  prob = list(time_prob.values())
  for i in range(n):
    times.append(str(random.choices(hours, prob)[0]) + ":00")

  return times

In [77]:
class Driver:
  def __init__(self, id, rating, hour, start_point, end_point):
    self.id = id
    self.rating = rating
    self.hour = hour
    self.start_point = start_point
    self.end_point = end_point

  def __str__(self):
    return "Driver ( id = " + str(self.id) + ", rating = " + str(self.rating) + ", hour = " + self.hour + ", start = " + str(self.start_point) + ", dest = " + str(self.end_point) + ")"

class Rider:
  def __init__(self, id, hour, start_point, end_point):
    self.id = id
    self.hour = hour
    self.start_point = start_point
    self.end_point = end_point

  def __str__(self):
    return "Rider ( id = " + str(self.id) + ", hour = " + self.hour + ", start = " + str(self.start_point) + ", dest = " + str(self.end_point) + ")"

In [80]:
def generate_data():
  # 1- generate n_drivers and n_riders
  # range: start, end, stepsize
  n_drivers = random.randrange(40, 50, 1)
  n_riders = random.randrange(90, 150, 15)
  print("N_drivers: ", n_drivers)
  print("N_riders: ", n_riders)
  # 2 - generate start point and end point for drivers and riders
  trips_start_drivers, trips_dest_drivers = generate_trips(n_drivers)
  trips_start_riders, trips_dest_riders = generate_trips(n_riders)
  # 3 - generate schedules
  drivers_schedule = generate_schedules(n_drivers)
  riders_schedule = generate_schedules(n_riders)
  # 4- generate ratings for drivers
  ratings = generate_ratings(n_drivers)
  # 5 - parse in a list of objects
  riders = []
  drivers = []
  for i in range(n_riders):
    riders.append( Rider(i, riders_schedule[i], trips_start_riders[i], trips_dest_riders[i]))

  for i in range(n_drivers):
    drivers.append( Driver(i, ratings[i][0], drivers_schedule[i], trips_start_drivers[i], trips_dest_drivers[i]))

  return drivers, riders



In [81]:
drivers, riders = generate_data()

for i in range(5) :
  print(drivers[i])

print("\n\n***********************************************************************\n\n")

for i in range(5) :
  print(riders[i])


N_drivers:  45
N_riders:  120
Driver ( id = 0, rating = 3.5, hour = 18:00, start = {'id': '313013208', 'coordinates': (12.25, 45.41)}, dest = {'id': '8789265183', 'coordinates': (12.39, 45.59)})
Driver ( id = 1, rating = 4.5, hour = 18:00, start = {'id': '2444516688', 'coordinates': (11.86, 45.37)}, dest = {'id': '8039450669', 'coordinates': (11.78, 45.34)})
Driver ( id = 2, rating = 3.5, hour = 6:00, start = {'id': '8261912126', 'coordinates': (12.19, 45.53)}, dest = {'id': '2999709445', 'coordinates': (12.2, 45.45)})
Driver ( id = 3, rating = 3.0, hour = 14:00, start = {'id': '495125033', 'coordinates': (12.55, 45.65)}, dest = {'id': '4587689364', 'coordinates': (12.54, 45.64)})
Driver ( id = 4, rating = 3.5, hour = 6:00, start = {'id': '1462181640', 'coordinates': (11.95, 45.57)}, dest = {'id': '330870502', 'coordinates': (11.92, 45.39)})


***********************************************************************


Rider ( id = 0, hour = 18:00, start = {'id': '6436253332', 'coordinate