# Data Generation for Trip System 

In [11]:
B_to_GB = 1024*1024*1024  # size of 1GB
# target_size = B_to_GB * 50 # Target size in GB , 
target_size = B_to_GB # Target size in GB , 

In [None]:
output_file = '/content/data/output.json'

## Standard Route Generation

In [12]:
import random
import json
import os.path
import networkx as nx
import numpy as np

In [13]:
number_of_standard_trips = 10 # Total number of trips to be generated

variable_trip_size = False #Set to false to make all full trips to be equal length
variable_item_size = True #Set to false to make each individual trip to carry the same number of individual items

ave_degree = 10

if variable_item_size :
    min_num_items_per_trip = 2 
    max_num_items_per_trip = 10
else:
    num_items_per_trip = 10 #set if variable_item_size is set to False



if variable_trip_size :
    min_num_cities_per_standard_trip = 2 
    max_num_cities_per_standard_trip = 10
else:
    num_cities_per_standard_trip = 10 #set if variable_trip_size is set to False

In [14]:
cities = []
items = []

with open("/content/data/cities.txt", "r") as file:
    cities = file.read().splitlines()

with open("/content/data/foods.txt", "r") as file:
    items = file.read().splitlines()


In [15]:
with open("/content/data/matrix.txt", "r") as file:
  # matrix = np.loadtxt(file)
  # A = matrix[:, :len(cities)]
  A = np.loadtxt(file, usecols=range(len(cities)))



# print(A)
G = nx.from_numpy_array(np.array(A)) 
mapping = {i:cities[i] for i in range(len(cities))}
G = nx.relabel_nodes(G, mapping)

In [16]:
def Generate_Random_Standard_Trip(length ):
    trip = []
    trip.append( cities[random.randint(0,len(cities)-1)])
    while len(trip) <= length:
        neighbours = list(nx.neighbors(G, trip[len(trip)-1]))
        random.shuffle(neighbours)
        found = False
        for neigh in neighbours:
            if not neigh in trip:
                trip.append(neigh)
                found = True
                break

        if not found:               # If length size not satisfied, then remove last city and try again
            trip.pop()

    return trip


In [17]:
def Generate_All_Standard_Trips(length):
    all_standard_routes = []
    for i in range(length):
        if variable_trip_size:
            num = random.randint(min_num_cities_per_standard_trip, max_num_cities_per_standard_trip)
            all_standard_routes.append(Generate_Random_Standard_Trip(num))
        else:
            all_standard_routes.append(Generate_Random_Standard_Trip(num_cities_per_standard_trip))
            

    return all_standard_routes

In [18]:
def Convert_Routes_TO_JSON(all_standard_routes):
    json_list = []
    for i in range(len(all_standard_routes)):
        city_to_city = []
        for j in range(len(all_standard_routes[i])-1):
            permutation = random.sample(range(0, len(items)), random.randint(min_num_items_per_trip if variable_item_size else num_items_per_trip ,max_num_items_per_trip if variable_item_size else num_items_per_trip))
            food_list = {items[k]:random.randint(1,20) for k in permutation }
            city_to_city.append({'from' : all_standard_routes[i][j], 'to' :all_standard_routes[i][j+1], 'merchandise' : food_list})
        
        json_list.append({'id': i , 'route': city_to_city})

    return(json.dumps(json_list, indent=3,sort_keys=False))


In [19]:
json_data = Convert_Routes_TO_JSON(Generate_All_Standard_Trips(number_of_standard_trips))


output_file = '/content/data/data.json'
with open(output_file, 'w') as file:
    file.write(json_data)

print(f"JSON data saved to {output_file}")

JSON data saved to /content/data/data.json


## Acutal Route Generation

In [20]:
merch_edit_amount = 10 # range of how much we can edit the quantity of each item

In [21]:
IDs = []
Routes = []
Merchs = []

# with open(os.path.dirname(__file__) +'/../data/foods.txt') as file:
#     items = [line.strip() for line in file]

# with open(os.path.dirname(__file__) + '/../data/cities.txt') as file:
#     cities = [line.strip() for line in file]

with open('/content/data/data.json', 'r') as openfile:
    json_object = json.load(openfile)

In [22]:
def Split_JSON_Obj(json_object):
    for line in json_object:
        IDs.append(line['id'])
        route_obj = line['route']
        full_route = [route_obj[0]['from']]
        full_merch = []
        for r in route_obj:
            full_route.append(r['to'])
            full_merch.append(r['merchandise'])
        Routes.append(full_route)
        Merchs.append(full_merch)

In [23]:
def create_neighbour_list(neighbour_list, A):
    for i in range(len(A)):
        temp = []
        for j in range(len(A)):
            if A[i][j] == 1:
                temp.append(j)
        
        neighbour_list.append(temp)

In [24]:
def conv_city_names(Routes):
    for i in range(len(Routes)):
        for j in range(len(Routes[i])):
            Routes[i][j] = cities.index(Routes[i][j])

In [25]:
Split_JSON_Obj(json_object)
# A = np.loadtxt(os.path.dirname(__file__) + '/../data/matrix.txt', usecols=range(len(cities)))

neighbour_list = []
create_neighbour_list(neighbour_list, A)
conv_city_names(Routes)

In [26]:
def binary_search(arr, x):
    low = 0
    high = len(arr) - 1
    mid = 0
 
    while low <= high:
        mid = (high + low) // 2
 
        if arr[mid] < x:
            low = mid + 1
        elif arr[mid] > x:
            high = mid - 1
        else:
            return mid
    # If we reach here, then the element was not present
    return -1

In [27]:
def find_common_neigh(a,b,neighbour_list):
    lists = []
    if len(neighbour_list[a]) > len(neighbour_list[b]):
        smaller_list = neighbour_list[b]
        bigger_list = neighbour_list[a]
    else:
        smaller_list = neighbour_list[a]
        bigger_list = neighbour_list[b]

    for i in smaller_list:
        ind = binary_search(bigger_list , i)
        if ind != -1:
            lists.append(i)
    
    return lists

In [28]:
def add_random_route_to_end(route , merch): 
    permutation = random.sample(range(0, len(neighbour_list[route[len(route)-1]])) , len(neighbour_list[route[len(route)-1]]))
    for i in permutation:                                                         # does not add city already in the route
        city = neighbour_list[route[0]][i]
        if not city in route:
            route.insert(0,city)
            break

    permutation = random.sample(range(0, len(items)), random.randint(min_num_items_per_trip if variable_item_size else num_items_per_trip ,max_num_items_per_trip if variable_item_size else num_items_per_trip))
    merch.append({items[k]:random.randint(1,20) for k in permutation})
    

In [29]:
def add_random_route_to_front(route , merch):
    permutation = random.sample(range(0, len(neighbour_list[route[0]])) , len(neighbour_list[route[0]]))
    for i in permutation:                                                         # does not add city already in the route
        city = neighbour_list[route[0]][i]
        if not city in route:
            route.insert(0,city)
            break

    permutation = random.sample(range(0, len(items)), random.randint(min_num_items_per_trip if variable_item_size else num_items_per_trip ,max_num_items_per_trip if variable_item_size else num_items_per_trip))
    merch.insert(0,{items[k]:random.randint(1,20) for k in permutation})

In [30]:
def edit_random_items(merch,edit_num):
    names = []
    values = []
    for key, value in merch.items():
        names.append(key)
        values.append(value)

    if edit_num > len(merch):
        edit_num = len(merch)

    for i in random.sample(range(0, len(merch)), edit_num):
        while True:                                                             # ensures that we do not have negative or zero quantity
            gen = random.randint(-merch_edit_amount, merch_edit_amount)
            if gen + values[i] > 0:
                merch[names[i]] = values[i] + gen
                break

In [31]:
def del_random_items(merch, del_num):
    names = list(merch.keys())

    if del_num >= len(merch):
        return
    for i in random.sample(range(0, len(merch)-1), del_num):
        del merch[names[i]]

In [32]:
def add_random_items(merch, add_num):    # does not add item already in the list
    names = list(merch.keys())
    i=0 
    count= 0
    permutation = random.sample(range(0, len(items)), len(items))

    # Add item not already in the list 
    while i < add_num: 
        if not items[permutation[count]] in names:
            merch[items[permutation[count]]]=random.randint(1,20)
            i+=1 

        count+= 1
        if count == len(items):
            return

In [33]:
def add_random_trip(routes , merchs):                                       # Add random trip in the middle of the route
    
    permutation = random.sample(range(0, len(routes)-1), len(routes)-1)     # Random permutation to try add a trip
    found = False
    for k in permutation:
        if k == 0:
            add_random_route_to_front(routes, merchs)
            return
        elif k == len(routes)-1:
            add_random_route_to_end(routes, merchs)
            return

        prev_city = routes[k-1]
        next_city = routes[k]
        common_neighbors = find_common_neigh(prev_city,next_city, neighbour_list)

        if len(common_neighbors) != 0:
            picked = random.choice(common_neighbors)
            found = True
            break

    if not found:
        print("No common neighbours")
        return
    
    routes.insert(k , picked)
    merch_list = list(merchs)
    for i in range(-1,1):
        permutation = random.sample(range(0, len(items)), random.randint(min_num_items_per_trip if variable_item_size else num_items_per_trip ,max_num_items_per_trip if variable_item_size else num_items_per_trip))
        food_list = {items[l]:random.randint(1,20) for l in permutation }
        merch_list.insert(i+k , food_list)

    merchs.clear()
    for lists in merch_list:
        merchs.append(lists)

In [34]:
def remove_front(routes , merchs):
    del routes[0]
    del merchs[0]

In [35]:
def remove_end(routes , merchs):
    del routes[len(routes)-1]
    del merchs[len(merchs)-1]

In [36]:
def remove_trip_at(ind , routes , merchs):
    prev_city = routes[ind-1]
    next_city = routes[ind]
    common_neighbors = find_common_neigh(prev_city,next_city, neighbour_list)

    if len(common_neighbors) == 0:      # No common neighbours
        return False
    
    picked = random.choice(common_neighbors)
    
    routes.insert(ind , picked)
    merch_list = list(merchs)
    for i in range(-1,1):
        permutation = random.sample(range(0, len(items)), random.randint(min_num_items_per_trip if variable_item_size else num_items_per_trip ,max_num_items_per_trip if variable_item_size else num_items_per_trip))
        food_list = {items[l]:random.randint(1,20) for l in permutation }
        merch_list.insert(i+ind , food_list)

    merchs.clear()
    for lists in merch_list:
        merchs.append(lists)
    
    return True

In [37]:
def remove_random_trip(routes , merchs): 
    if len(routes) < 3:
        return 
    
    permutation = random.sample(range(0, len(routes)-1), len(routes)-1)     # Random permutation to try add a trip

    for k in permutation:
        if k == 0:
            remove_front(routes , merchs)
        elif k == len(routes)-1:
            remove_end(routes , merchs)
        else:
            if not remove_trip_at(k,routes , merchs ):
                continue
        return

In [38]:
def convert_to_JSON(JSON_list, id, routes, merchs):
    city_to_city = []
    for j in range(len(routes) - 1):
        merchandise = []
        for item, quantity in merchs[j].items():
            merchandise.append({"item": item, "quantity": quantity})
        city_to_city.append({"from": routes[j], "to": routes[j + 1], "merchandise": merchandise})

    JSON_list.append({"id": id, "route": city_to_city})

In [39]:
with open(output_file, mode="w+") as file:
    file.write("[")

    new_id = 0
    mean = 3
    sd = 3

    # Generate distribution of numbers
    rand_list = np.random.normal(loc=mean, scale=sd, size=100)
    rand_dist_list = []
    # Convert numbers to positive int
    for i in range(len(rand_list)):
        rand_dist_list.append(int(abs(round(rand_list[i]))))

    while os.path.getsize(output_file) < target_size:
        picked = random.randint(0, len(Routes) - 1)  # Pick random route
        routes = Routes[picked].copy()
        merchs = Merchs[picked].copy()

        num_remove_trip = rand_dist_list[random.randint(0, len(rand_dist_list) - 1)]
        num_add_trip = rand_dist_list[random.randint(0, len(rand_dist_list) - 1)]

        for i in range(num_add_trip):
            add_random_trip(routes, merchs)

        for i in range(num_remove_trip):
            remove_random_trip(routes, merchs)

        for j in range(len(merchs)):
            num_remove_merch = rand_dist_list[random.randint(0, len(rand_dist_list) - 1)]
            num_add_merch = rand_dist_list[random.randint(0, len(rand_dist_list) - 1)]
            num_edit_merch = rand_dist_list[random.randint(0, len(rand_dist_list) - 1)]

            del_random_items(merchs[j], num_remove_merch)
            add_random_items(merchs[j], num_add_merch)
            edit_random_items(merchs[j], num_edit_merch)

        city_to_city = []
        for j in range(len(routes) - 1):
            merchandise = []
            for item, quantity in merchs[j].items():
                merchandise.append({"food_type": item, "qty": quantity})
            city_to_city.append({"from": cities[routes[j]], "to": cities[routes[j + 1]], "merchandise": merchandise})

        json_output = json.dumps({"id": new_id, "route": city_to_city}, indent=3, sort_keys=False)
        file.write(json_output)
        if os.path.getsize(output_file) < target_size:
            file.write(",\n")

        new_id += 1

    file.write("]")

print("Output file reached the target size!")

Output file reached the target size!
