# WSmart+ Route Datasets

In [1]:
from notebook_setup import setup_home_directory, setup_google_colab


NOTEBOOK_NAME = 'datasets'
home_dir = setup_home_directory(NOTEBOOK_NAME)
IN_COLAB, gdrive, gfiles = setup_google_colab(NOTEBOOK_NAME)

Setup completed - added home_dir to system path: /home/pkhunter/Repositories/WSmart-Route


In [2]:
if IN_COLAB:
    %pip install osmnx
    %pip install plotly
    %pip install pandas
    %pip install scipy
    %pip install torch
    %pip install torch_geometric
    %pip install networkx
    %pip install numpy

In [None]:
import os
import math
import json
import torch
import osmnx as ox
import numpy as np
import pandas as pd
import networkx as nx
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

from itertools import islice
from collections.abc import Iterable
from torch.utils.data import DataLoader
from scipy.stats import gamma
from torch_geometric.utils import segregate_self_loops

from logic.src.pipeline.simulator.wsmart_bin_analysis import OldGridBase, GridBase, Simulation
from logic.src.utils.plot_utils import draw_graph
from logic.src.utils.functions import load_problem
from logic.src.utils.graph_utils import sort_by_pairs
from logic.src.pipeline.simulator.processor import process_indices, haversine_distance
from logic.src.pipeline.simulator.network import compute_distance_matrix
from logic.src.pipeline.simulator.loader import load_depot, load_simulator_data


SEED = 42
torch.manual_seed(SEED)

MAX_DISPLAY_ROWS = 500
pd.set_option('display.max_rows', MAX_DISPLAY_ROWS)

SHOW_MAPS = False
SHOW_LISTS = False
SHOW_TABLES = False
if IN_COLAB: 
    gdrive.mount('/content/drive')

# Required to use matplotlib in Windows without breaking the Kernel
if os.name == 'nt':
    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [4]:
def show_type(obj, obj_name=None):
    def _flatten(S):
        if S == []:
            return S
        if isinstance(S[0], Iterable):
            return _flatten(S[0]) + _flatten(S[1:])
        return S[:1] + _flatten(S[1:])

    if not obj_name is None:
        print(f"Type of {obj_name} instance: {type(obj)}")
    else:
        print(f"Type: {type(obj)}")

    if isinstance(obj, Iterable):
        if isinstance(obj, pd.DataFrame):
            print(f"Iterable dimensions: {obj.shape}")
            print(f"Element type: {type(obj.iloc[0])}")
        else:
            if not isinstance(obj, np.ndarray):
                obj = np.array(obj)

            print(f"Iterable dimensions: {obj.shape}")
            print(f"Element type: {type(obj.item(0))}")

## Load and generate data

In [5]:
area = "Rio Maior"
data_dir = os.path.join(home_dir, "data", "wsr_simulator")
depot = load_depot(data_dir, area)
depot_c = depot[['ID', 'Lat', 'Lng']]
depot_c

Unnamed: 0,ID,Lat,Lng
0,0,39.183851,-9.148065


### WSmart+ Route Simulator (WSRS) original data

In [6]:
def show_maxmin_latlong(coords):
    print('Coordinate limits for', len(coords), 'bins:')
    print('- Lat min:', coords['Lat'].min())
    print('- Lat max:', coords['Lat'].max())
    print('- Lon min:', coords['Lng'].min())
    print('- Lon max:', coords['Lng'].max())

In [7]:
old_data225, old_coords225 = load_simulator_data(data_dir, number_of_bins=225, area="mixrmbac")
old_dist_matrix225 = compute_distance_matrix(pd.concat([depot_c, old_coords225]).reset_index(drop=True), method='ogd')
show_maxmin_latlong(old_coords225)

Coordinate limits for 225 bins:
- Lat min: 39.1923394226
- Lat max: 39.4742888888889
- Lon min: -9.19131189008
- Lon max: -8.79293435861


In [8]:
if SHOW_MAPS:
    coords225_to_print = old_coords225.copy()
    coords225_to_print['Dist_Depot'] = old_dist_matrix225[:1, 1:][0]
    fig = px.scatter_mapbox(
        coords225_to_print, 
        lat="Lat", 
        lon="Lng", 
        hover_name="ID", 
        hover_data=["ID", "Dist_Depot"],
        color="Dist_Depot",
        color_continuous_scale=[(0, 'cyan'), (1,'blue')],
        #color_discrete_sequence=["blue"],
        size_max=10,
        zoom=10,
        height=650,
        width=750
    )

    fig.add_trace(go.Scattermapbox(
        lat=depot_c['Lat'],
        lon=depot_c['Lng'],
        mode="markers+text",
        marker=go.scattermapbox.Marker(size=10, color="black"),
        text=["Depot"],
        textposition="bottom center",
        name="Depot",
        showlegend=False
    ))

    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

In [9]:
if False:
    # mode='all'|'all_public'|'drive_service'|'drive'|'bike'|'walk'
    mode = 'drive_service'
    ox.settings.use_cache = True
    ox.settings.log_console = True
    #bbox_limits = (np.min(bins_coords['Lat'])-0.5, np.max(bins_coords['Lng'])-0.2, np.max(bins_coords['Lng'])+0.2, np.min(bins_coords['Lat'])+0.5)
    bbox_limits = (np.min(bins_coords['Lat']), np.max(bins_coords['Lng']), np.max(bins_coords['Lng']), np.min(bins_coords['Lat']))
    graph = ox.graph_from_bbox(bbox_limits, network_type=mode)
    ox.plot_graph(graph)

In [10]:
if False:
    i = 0
    matrix = [['BIN_ID_X', 'BIN_ID_Y', 'DISTANCE']]
    for bin in islice(bins_coords.iterrows(), 0, len(list(bins_coords.ID))):
        j = 0
        for each_bin in bins_coords.iterrows():
            start_coords = (float(bin[1][1]), float(bin[1][2]))
            end_coords = (float(each_bin[1][1]), float(each_bin[1][2]))
            orig_node = ox.nearest_nodes(graph, start_coords[0], start_coords[1])
            dest_node = ox.nearest_nodes(graph, end_coords[0], end_coords[1])
            print('orig:', orig_node)
            print('dest:', dest_node)
            shortest_route_distance = nx.shortest_path_length(graph, orig_node, dest_node, weight="length", method="dijkstra")
            matrix.append([str(int(bin[1][0])) + ", " + str(int(each_bin[1][0])) + ", " + str(float(shortest_route_distance))])
            print(matrix[-1])

    print(matrix)

#### WSRS full dataset and subsets

In [11]:
old_data20, old_coords20 = load_simulator_data(data_dir, number_of_bins=20, area="mixrmbac")
old_dist_matrix20 = compute_distance_matrix(pd.concat([depot_c, old_coords20]).reset_index(drop=True), method='ogd')
show_maxmin_latlong(old_coords20)

Coordinate limits for 20 bins:
- Lat min: 39.2394966666667
- Lat max: 39.313247722
- Lon min: -9.14663990915
- Lon max: -9.05968835622


In [12]:
old_data50, old_coords50 = load_simulator_data(data_dir, number_of_bins=50, area="mixrmbac")
old_dist_matrix50 = compute_distance_matrix(pd.concat([depot_c, old_coords50]).reset_index(drop=True), method='ogd')
show_maxmin_latlong(old_coords50)

Coordinate limits for 50 bins:
- Lat min: 39.1933922824
- Lat max: 39.3278877543
- Lon min: -9.19131189008
- Lon max: -8.84237843324


In [13]:
s20 = pd.Series(np.sum(old_dist_matrix20[1:, 1:], axis=1)).sort_values()
s50 = pd.Series(np.sum(old_dist_matrix50[1:, 1:], axis=1)).sort_values()
s225 = pd.Series(np.sum(old_dist_matrix225[1:, 1:], axis=1)).sort_values()

# Reindex series to the same index (use union of all indices)
index_union = s20.index.union(s50.index).union(s225.index)

# Reindex all series
s20 = s20.reindex(index_union).sort_values()
s50 = s50.reindex(index_union).sort_values()
s225 = s225.reindex(index_union).sort_values()

df = pd.DataFrame({
    'ID20': s20.index, 'B20': s20.values,
    'ID50': s50.index, 'B50': s50.values,
    'ID225': s225.index, 'B225': s225.values
})
if SHOW_TABLES: display(df)

### WSmart+ Bin Analysis (WSBA) data

In [14]:
data317, coords317 = load_simulator_data(data_dir, number_of_bins=317, area=area)
dist_matrix317 = compute_distance_matrix(pd.concat([depot_c, coords317]).reset_index(drop=True), method='ogd')
show_maxmin_latlong(coords317)

Coordinate limits for 317 bins:
- Lat min: 39.25353454
- Lat max: 39.4482722222222
- Lon min: -8.98823888888889
- Lon max: -8.79266007


In [15]:
center_parallel = (coords317['Lat'].min() + coords317['Lat'].max()) / 2
print(center_parallel)
print(coords317['Lat'].min() - center_parallel)
print(coords317['Lat'].max() - center_parallel)

39.350903381111095
-0.09736884111109845
0.09736884111110555


In [16]:
if SHOW_MAPS:
    coords317_to_print = coords317.copy()
    coords317_to_print['Dist_Depot'] = dist_matrix317[:1, 1:][0]
    fig = px.scatter_mapbox(
        coords317_to_print, 
        lat="Lat", 
        lon="Lng", 
        hover_name="ID", 
        hover_data=["ID", "Dist_Depot"],
        color="Dist_Depot",
        color_continuous_scale=[(0, 'cyan'), (1,'blue')],
        #color_discrete_sequence=["blue"],
        size_max=10,
        zoom=10,
        height=450,
        width=550
    )

    fig.add_trace(go.Scattermapbox(
        lat=depot_c['Lat'],
        lon=depot_c['Lng'],
        mode="markers+text",
        marker=go.scattermapbox.Marker(size=10, color="black"),
        text=["Depot"],
        textposition="bottom center",
        name="Depot",
        showlegend=False
    ))

    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()

#### Separate types of waste

In [17]:
def process_old_wsba_coords(bins_coords):
    bins_coords = bins_coords.rename(columns={'Latitude': 'Lat', 'Longitude': 'Lng'})
    bins_coords = bins_coords[['ID', 'Lat', 'Lng']]
    return bins_coords.reset_index(drop=True)

In [18]:
grid = OldGridBase(data_dir, area)
new_data, info = grid.load_data()
show_type(info, "info")
show_type(new_data, "new_data")
if SHOW_TABLES: display(info)

Type of info instance: <class 'pandas.core.frame.DataFrame'>
Iterable dimensions: (317, 11)
Element type: <class 'pandas.core.series.Series'>
Type of new_data instance: <class 'pandas.core.frame.DataFrame'>
Iterable dimensions: (1025, 317)
Element type: <class 'pandas.core.series.Series'>


In [19]:
waste_types = ['Mistura de embalagens', 'Embalagens de papel e cartão']
plastic_bins = info[info['Tipo de Residuos'] == waste_types[0]]
plastic_df = process_old_wsba_coords(plastic_bins)
plastic_dist_matrix = compute_distance_matrix(pd.concat([depot_c, plastic_df]).reset_index(drop=True), method='ogd')
plastic_df['Distance'] = np.sum(plastic_dist_matrix[1:, 1:], axis=1)
if SHOW_TABLES: display(plastic_df)

In [20]:
merged_plastic = plastic_df.merge(old_coords225, on=['Lat', 'Lng'], how='inner')
merged_plastic = merged_plastic.drop('ID_y', axis=1).rename(columns={'ID_x': 'ID'})
merged_idx = plastic_df[plastic_df['ID'].isin(merged_plastic['ID'])].index.tolist()
merged_plastic.insert(0, 'index', merged_idx)
merged_plastic = merged_plastic.sort_values('Distance')
if SHOW_TABLES: display(merged_plastic)

#### Intermediate graphs selection

In [21]:
first20_plastic = merged_plastic['index'].head(20).tolist()
first20_plastic.sort()
if SHOW_LISTS: print(first20_plastic)

[4, 10, 11, 13, 14, 15, 21, 24, 30, 32, 33, 46, 63, 64, 74, 76, 154, 155, 157, 169]


In [22]:
last50_plastic = merged_plastic['index'].tail(50).tolist()
last50_plastic.sort()
if SHOW_LISTS: print(last50_plastic)

[0, 1, 3, 4, 5, 7, 9, 11, 13, 14, 15, 17, 20, 21, 24, 27, 30, 32, 33, 37, 38, 45, 46, 57, 63, 64, 66, 67, 70, 73, 74, 75, 76, 128, 141, 146, 149, 150, 151, 152, 153, 154, 156, 157, 158, 160, 163, 166, 169, 172]


In [23]:
ckeys = ['Lat', 'Lng']
mkeys = merged_plastic[ckeys].apply(tuple, axis=1)
pkeys = plastic_df[ckeys].apply(tuple, axis=1)
wsba_only_df = plastic_df[~pkeys.isin(mkeys)].sort_values('Distance')
wsba_only_idx = plastic_df[plastic_df['ID'].isin(wsba_only_df['ID'])].index.tolist()
wsba_only_df.insert(0, 'index', wsba_only_idx)
wsba_only_df = wsba_only_df.sort_values('Distance')
if SHOW_TABLES: display(wsba_only_df)

In [24]:
sorted_plastic_df = plastic_df.reset_index().sort_values('Distance')
if SHOW_TABLES: display(sorted_plastic_df)

In [25]:
last170_only_wsba = sorted_plastic_df['index'].tail(170).tolist()
last170_only_wsba.sort()
if SHOW_LISTS: print(last170_only_wsba)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172]


In [26]:
first100_only_wsba = sorted_plastic_df['index'].head(100).tolist()
first100_only_wsba.sort()
if SHOW_LISTS: print(first100_only_wsba)

[0, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 18, 19, 21, 22, 24, 25, 29, 30, 32, 33, 36, 37, 39, 40, 41, 42, 43, 46, 47, 54, 56, 58, 60, 61, 62, 63, 64, 68, 69, 71, 72, 74, 75, 76, 77, 78, 82, 85, 87, 92, 93, 94, 95, 97, 98, 99, 102, 104, 105, 107, 110, 111, 112, 113, 114, 115, 117, 118, 120, 122, 124, 125, 130, 135, 136, 137, 138, 139, 140, 141, 142, 145, 146, 147, 151, 153, 154, 155, 156, 157, 158, 159, 163, 164, 167, 169, 172]


#### Full data pipeline

In [27]:
if SHOW_TABLES: display(new_data)

In [28]:
new_index = new_data.index
print(new_index[0])
print(new_index[-1])

2021-01-12 00:00:00
2023-10-23 00:00:00


In [42]:
new_data_transpose = new_data.copy().transpose()
new_data_transpose["Stock"] = np.zeros((len(new_data_transpose)))
new_data_transpose['Accum_Rate'] = new_data_transpose.mean(axis=1)
new_data_transpose = new_data_transpose[['Stock', 'Accum_Rate']].reset_index(drop=False).rename(columns={'index': 'ID'})
new_data_transpose['ID'] = pd.to_numeric(new_data_transpose['ID'])
if SHOW_TABLES: display(new_data_transpose)

In [43]:
full_new_data = pd.merge(new_data_transpose, info, on='ID')
full_new_data = full_new_data[['ID', 'Stock', 'Accum_Rate', 'Latitude', 'Longitude']].rename(columns={'Latitude': 'Lat', 'Longitude': 'Lng'})
wsba_dist_depot = []
for lat, long in zip(full_new_data['Lat'], full_new_data['Lng']):
    tmp_dist = haversine_distance(depot['Lat'], depot['Lng'], lat, long)
    wsba_dist_depot.append(tmp_dist)

In [45]:
#full_new_data['Edge_Weight']= [sum(row) for row in compute_distance_matrix(full_new_data, wsba_size)]
full_new_data['Dist_Depot'] = wsba_dist_depot
if SHOW_TABLES: display(full_new_data)

In [47]:
unprocessed_data, _ = grid.load_data(processed=False)
if SHOW_TABLES: display(unprocessed_data)

In [None]:
Ndays = 3
srate_ls = []
for _ in range(Ndays):
    srate_ls.append(grid.sample())

show_type(srate_ls)
if SHOW_LISTS: print(srate_ls)

In [None]:
trate_ls = []
start_date = pd.to_datetime("2022-03-01")
end_date = pd.to_datetime(start_date) + pd.DateOffset(days=Ndays-1)
for date in pd.date_range(start=start_date, end=end_date):
    trate_ls.append(grid.get_values_by_date(date, sample = True))
    
show_type(trate_ls)
if SHOW_LISTS: print(trate_ls)

In [None]:
std_rate = grid.get_std_rate()
show_type(std_rate)
print(std_rate)

In [None]:
mean_rate = grid.get_mean_rate()
show_type(mean_rate)
print(mean_rate)

In [None]:
var_rate = grid.get_var_rate()
show_type(var_rate)
print(var_rate)

In [None]:
bins_idx = [0, 1]
print(f"Number of bins: {len(info)}")
for id in bins_idx:
    print(grid.get_info(id))

##### Sampling waste fill values

In [None]:
# Define parameters for gamma distribution
alpha_comb = [[5, 10], [2, 6], [1, 3]]  # Shape parameters
theta_comb = [[5, 2], [6, 4], [8, 6]]  # Scale parameters

x = np.linspace(0, 1000, 1000)  # Range of x values for plotting

# Plot PDFs for each combination of parameters
plt.figure(figsize=(10, 6))
for id, (alpha_values, theta_values) in enumerate(zip(alpha_comb, theta_comb)):
    for alpha in alpha_values:
        for theta in theta_values:
            # Calculate PDF for gamma distribution
            pdf = gamma.pdf(x, a=alpha, scale=theta)
            
            # Plot the PDF
            plt.plot(x, pdf, label=f'α={alpha}, θ={theta}')

    plt.title(f'Probability Density Function of Gamma Distribution {id+1}')
    plt.xlabel('Daily Waste Fill')
    plt.ylabel('PDF')
    plt.legend()
    plt.grid(True)
    plt.xlim(0, 120)

    #plt.savefig('gamma_distributions_dist2.png', dpi=300)

    plt.show()

### Setup bins

In [None]:
import math
import numpy as np
import scipy.stats as stats

from pipeline.simulator.wsmart_bin_analysis import OldGridBase


class Bins:
    def __init__(self, n, data_dir, sample_method="gamma", grid=None):
        assert sample_method in ["grid", "gamma"]
        self.n = n
        self.c = np.zeros((n))
        self.means = np.ones((n))*10
        self.std = np.ones((n))*1
        self.lost = np.zeros((n))
        self.distribution = sample_method
        self.dist_param1 = np.ones((n))*10
        self.dist_param2 = np.ones((n))*10
        self.inoverflow = np.zeros((n))
        self.collected = np.zeros((n))
        self.ncollections = np.zeros((n))
        self.travel = 0
        self.ndays = 0
        self.collectdays = np.ones((n))*5
        self.collectlevl = np.ones((n))*80
        self.indices = list(range(n))
        self.data_dir = data_dir
        if grid is None:
            self.grid = OldGridBase(data_dir)
        else:
            self.grid = grid

    def _predictdaystooverflow(self, ui, vi, f, cl):
        n = np.zeros(ui.shape[0])+31
        for ii in np.arange(1,31,1):
            k = ii*ui**2/vi
            th = vi/ui
            aux = np.zeros(ui.shape[0])+31
            p = 1-stats.gamma.cdf(100-f, k, scale=th)
            aux[np.nonzero(p>cl)[0]]=ii
            n = np.minimum(n,aux)
            if (p>cl).all():
                return n
            
    def predictdaystooverflow(self, cl):
        return self._predictdaystooverflow(self.means, self.std, self.c, cl)
    
    def set_indices(self, indices=None):
        if not indices is None:
            self.indices = indices

    def collect(self, idsfull):
        if not idsfull:
            return 0

        ids = set(idsfull)
        ids.remove(0)
        ids = np.array(list(ids))-1

        self.collected[ids] += self.c[ids]
        self.ncollections[ids] += 1
        collected = np.sum(self.c[ids])
        self.c[ids] = 0
        return collected

    def predictdaystooverflow(self, cl):
        return self._predictdaystooverflow(self.means, self.std, self.c, cl)

    def stochasticFilling(self):
        if self.distribution == 'gamma':
            todaysfilling = np.random.gamma(self.dist_param1, self.dist_param2, size=(self.n, ))
        elif self.distribution == 'emp':
            todaysfilling = np.take(self.grid.sample(), self.indices)

        # Lost overflows
        self.lost += np.maximum(self.c + todaysfilling - 100, 0)

        # New depositions - do not change order otherwise
        # xq + vals + vals for the overflow calculation
        self.c = np.minimum(self.c + todaysfilling, 100)
        self.c = np.maximum(self.c, 0)
        self.inoverflow += (self.c==100)
        return np.sum(self.inoverflow)

    def deterministicFilling(self, date):
        todaysfilling = self.grid.get_values_by_date(date, sample = True)
        
        # Lost overflows
        self.lost += np.maximum(self.c + todaysfilling - 100, 0)

        # New depositions - do not change order otherwise
        # xq + vals + vals for the overflow calculation
        self.c = np.minimum(self.c + todaysfilling, 100)
        self.c = np.maximum(self.c, 0)
        self.inoverflow += (self.c==100)
        return np.sum(self.inoverflow)

    def __setDistribution(self, param1, param2):
        if len(param1)==1:
            self.dist_param1 = np.ones((self.n))*param1
            self.dist_param2 = np.ones((self.n))*param2
        else:
            self.dist_param1 = param1
            self.dist_param2 = param2
        self.setCollectionLvlandFreq()

    def setGammaDistribution(self, option=0):
        def __set_param(param):
            param_len = len(param)
            if self.n == param_len:
                return param
            
            param = param * math.ceil(self.n / param_len)
            if self.n % param_len != 0:
                param = param[:param_len-self.n % param_len]
            return param
    
        self.distribution = 'gamma'
        if option == 0:
            k = __set_param([5, 5, 5, 5, 5, 10, 10, 10, 10, 10])
            th = __set_param([5, 2])
        elif option == 1:
            k = __set_param([2, 2, 2, 2, 2, 6, 6, 6, 6, 6])
            th = __set_param([6, 4])
        elif option == 2:
            k = __set_param([1, 1, 1, 1, 1, 3, 3, 3, 3, 3])
            th = __set_param([8, 6])
        else:
            assert option == 3
            k = __set_param([5, 2])
            th = __set_param([10])
        self.__setDistribution(k, th)

    def freqvisit2(self, ui, vi, cf):
        # a = gamma.cdf(30, k, scale=th)
        # c = gamma.ppf(a, k, scale=th)
        # print(a,c)
        for n in range(1,50):
            k = n*ui**2/vi
            th = vi/ui
            if n==1:
                ov = 100-stats.gamma.ppf(1-cf, k, scale=th)

            v = stats.gamma.ppf(1-cf, k, scale=th)
            if v>100:
                return n, ov

    def setCollectionLvlandFreq(self, cf = 0.9):
        for ii in range(0,self.n):
            f2,lv2 = self.freqvisit2(self.dist_param1[ii]*self.dist_param2[ii],self.dist_param1[ii]*self.dist_param2[ii]**2,cf)
            self.collectdays[ii] = f2
            self.collectlevl[ii] = lv2
        return

In [None]:
all_nodes = pd.concat([tmp_depot, bins_coords]).sort_index().reset_index(drop=True).drop('index', axis=1)
all_nodes

In [None]:
all_nodes_new = pd.concat([tmp_depot[['ID', 'Lat', 'Lng']], full_new_data[['ID', 'Lat', 'Lng']]]).sort_index().reset_index(drop=True).drop('index', axis=1)
all_nodes_new

In [None]:
dist_dict = {'Sum': [], 'Avg': [], 'Min': [], 'Max': []}
for nodes, g_size in zip([all_nodes, all_nodes_new], [wsrs_size, wsba_size]):
    distance_matrix = compute_distance_matrix(nodes, g_size+1)
    tmp_sum = sum(sum(row) for row in distance_matrix)
    dist_dict['Sum'].append(tmp_sum)
    dist_dict['Avg'].append(tmp_sum/(g_size+1))
    dist_dict['Min'].append(min([min(x) for x in distance_matrix]))
    dist_dict['Max'].append(max([max(x) for x in distance_matrix]))

In [None]:
dist_df = pd.DataFrame(index=[wsrs_size, wsba_size])
dist_df.index.names = ["#bin"]
for key in dist_dict.keys():
    dist_df[key] = dist_dict[key]

dist_df
#styled_df = dist_df.style.set_properties(subset=pd.IndexSlice[:, :], **{'font-weight': 'bold'})
#styled_df

In [None]:
distance_matrix = compute_distance_matrix(all_nodes, wsrs_size+1)
thresholds = [np.percentile(distance_matrix, 50), np.percentile(distance_matrix, 75), np.percentile(distance_matrix, 90)]
for thresh in thresholds:
    adj_matrix = (distance_matrix <= thresh).astype(int)
    print(f"threshold {thresh}: {np.sum(adj_matrix)}")

In [None]:
import torch

In [None]:
def generate_edge_idx(size, undirected=False):
    num_edges = math.ceil(size * (size - 1) / 3 - 2 * (size - 1))
    # Initialize an empty adjacency matrix
    adj_matrix = np.zeros((size+1, size+1), dtype=int)

    # Create a list of all possible edges
    possible_edges = [(i, j) for i in range(1, size+1) for j in range(1, size+1) if i != j]

    # For undirected graphs, avoid duplicate edges by ensuring (i, j) == (j, i)
    if undirected:
        possible_edges = [(i, j) for i, j in possible_edges if i < j]

    # Randomly select <num_edges> edges from the possible edges and populate adj matrix
    selected_edges = np.random.choice(len(possible_edges), num_edges, replace=False)
    for edge_index in selected_edges:
        i, j = possible_edges[edge_index]
        adj_matrix[i, j] = 1
        if undirected:
            adj_matrix[j, i] = 1

    # Add edges to and from the depot
    adj_matrix[0] = np.ones(size+1, dtype=int)
    adj_matrix[:, 0] = np.ones(size+1, dtype=int)
    adj_matrix[0, 0] = 0
    return torch.tensor(np.array(np.nonzero(adj_matrix)), dtype=torch.long)

In [None]:
edge_tmp = generate_edge_idx(20)
print(edge_tmp.shape)

In [None]:
def generate_edge_idx(size, thresh_val):
    threshold = np.percentile(distance_matrix, thresh_val)
    adj_matrix = (distance_matrix <= threshold).astype(int)
    adj_matrix[0] = np.ones(size+1, dtype=int)
    adj_matrix[:, 0] = np.ones(size+1, dtype=int)
    np.fill_diagonal(adj_matrix, 0)
    return np.array(np.nonzero(adj_matrix))

In [None]:
print(generate_edge_idx(wsrs_size, 50))

In [None]:
#midx = pd.MultiIndex.from_tuples((bid, key) for bid in [wsrs_size, wsba_size] for key in dist_dict.keys())
#midx
#dist_df = pd.DataFrame(data=[x for x in [y[0] for y in dist_dict.values()] + [y[1] for y in dist_dict.values()]], index=midx)
#dist_df.index.names = ['#bin']
#dist_df.transpose()
#dist_df.loc[-1] = [x for x in [y[0] for y in dist_dict] + [y[1] for y in dist_dict]]

In [None]:
Ndays = 31
Nsamples = 1
if wsrs_size in [20, 50, 225]:
    distance_matrix = compute_distance_matrix(all_nodes, wsrs_size+1)
    draw_graph(distance_matrix)
elif wsrs_size < 225:
    input_graphs_path = os.path.join(home_dir, "assets", "output", "wsrs", f"{Ndays}_days", f"op_{wsrs_size}", f"graphs_{Nsamples}N.json") 
    if os.path.isfile(input_graphs_path):
        with open(input_graphs_path) as fp:
            indices = json.load(fp)
    else:
        raise ValueError("Must provide a file with the indices of the bins used in the experiments.")
    
    sample_id = 0
    bins_coordinates_tmp = process_indices(all_nodes, indices[sample_id])
    distance_matrix = compute_distance_matrix(bins_coordinates_tmp, wsrs_size+1)
    draw_graph(distance_matrix)

### Generate bin fillings

In [None]:
bins = Bins(wsrs_size, data_dir, "gamma", grid=None)
Ndays = 5
overflows = [0]
bin_fillings = []
bins.setGammaDistribution()
for day in range(1, Ndays):
    overflow, filling = bins.stochasticFilling() 
    overflows.append(overflows[day - 1] + overflow)
    bin_fillings.append(filling)
    print(f'Day {day} #overflows: {overflows[day]}')

In [None]:
wsrs_overflows = np.array(overflows)
wsrs_fillings = np.array(bin_fillings)
wsrs_maxmin = (np.min(wsrs_fillings), np.max(wsrs_fillings))
print(f"Min daily filling: {wsrs_maxmin[0]}")
print(f"Max daily filling: {wsrs_maxmin[1]}")

In [None]:
bins = Bins(wsba_size, data_dir, "grid", grid)
overflows = [0]
bin_fillings = []
for day in range(1, Ndays):
    overflow, filling = bins.stochasticFilling() 
    overflows.append(overflows[day - 1] + overflow)
    bin_fillings.append(filling)
    print(f'Day {day} #overflows: {overflows[day]}')

In [None]:
wsba_overflows = np.array(overflows)
wsba_fillings = np.array(bin_fillings)
wsba_maxmin = (np.min(wsba_fillings), np.max(wsba_fillings))
print(f"Min daily filling: {wsba_maxmin[0]}")
print(f"Max daily filling: {wsba_maxmin[1]}")

In [None]:
x_days = np.arange(1, Ndays)
mean_wsrs_fillings = np.mean(wsrs_fillings, axis=1)
std_wsrs_fillings = np.std(wsrs_fillings, axis=1)
mean_wsba_fillings = np.mean(wsba_fillings, axis=1)
std_wsba_fillings = np.std(wsba_fillings, axis=1)

print(mean_wsrs_fillings)
print(std_wsrs_fillings)
print(mean_wsba_fillings)
print(std_wsba_fillings)

In [None]:
fig, ax = plt.subplots()
fig.figsize=(20, 10)
ax.set_xticks(x_days)
ax.set_title("Mu and Sigma of the daily bins' fillings")
ax.yaxis.grid(True)
wsrs_bar = ax.bar(x_days - 0.2, mean_wsrs_fillings, yerr=std_wsrs_fillings, width=0.4, label='Gamma', align='center', alpha=0.5, ecolor='black', capsize=10)
wsba_bar = ax.bar(x_days + 0.2, mean_wsba_fillings, yerr=std_wsba_fillings, width=0.4, label='WSBA', align='center', alpha=0.5, ecolor='black', capsize=10)
ax.set_xlabel('Day of the simulation')
ax.set_ylabel('Bins fillings')
ax.bar_label(wsrs_bar)
ax.bar_label(wsba_bar)
fig.legend()

In [None]:
my_dict = {f"Day {day}": day_fill for day, day_fill in zip(x_days, wsrs_fillings)}
fig, ax = plt.subplots()
fig.figsize=(20, 10)
ax.set_xticks(x_days)
ax.set_title("Daily filling for Gamma Distribution")
ax.yaxis.grid(True)
wsba_bar = ax.boxplot(my_dict.values(), labels=my_dict.keys())
ax.set_xlabel('Day of the simulation')
ax.set_ylabel('Bins fillings')

In [None]:
my_dict = {f"Day {day}": day_fill for day, day_fill in zip(x_days, wsba_fillings)}
fig, ax = plt.subplots()
fig.figsize=(20, 10)
ax.set_xticks(x_days)
ax.set_title("Daily filling for WSmart+ Bin Analysis Grid")
ax.yaxis.grid(True)
wsba_bar = ax.boxplot(my_dict.values(), labels=my_dict.keys())
ax.set_xlabel('Day of the simulation')
ax.set_ylabel('Bins fillings')

In [None]:
overflows_ls = []
x_days = np.arange(0, Ndays)
graph_size_ls = [20, 50, 100, 150]
for id, n_nodes in enumerate(graph_size_ls):
    tmp_overflows = [0]
    bins = Bins(n_nodes, data_dir, "gamma")
    bins.setGammaDistribution()
    print(f"Graph size: {n_nodes}")
    for day in range(1, Ndays):
        overflow, filling = bins.stochasticFilling() 
        tmp_overflows.append(tmp_overflows[day - 1] + overflow)
        print(f' - day {day} #overflows: {tmp_overflows[day]}')
    
    overflows_ls.append(tmp_overflows)

In [None]:
x_days = np.arange(0, Ndays)
graph_size_ls.append(225)
overflows_ls.append(wsrs_overflows)
for n_nodes, ofs_elem in zip(graph_size_ls, overflows_ls):
    plt.plot(x_days, ofs_elem, label=f"Gamma Distribution OP{n_nodes}")

plt.plot(x_days, wsba_overflows, label="WSmart+ Bin Analysis Grid")
plt.xlabel('Day')
plt.ylabel('#overflows')
plt.title('Cumulative #overflows')
plt.legend()
plt.show()

In [None]:
bins = Bins(317, data_dir, "grid", grid)
overflows = [0]
bin_fillings = []
for day in range(1, Ndays):
    overflow, filling = bins.stochasticFilling(trim_fill=True) 
    overflows.append(overflows[day - 1] + overflow)
    bin_fillings.append(filling)
    print(f'Day {day} #overflows: {overflows[day]}')

In [None]:
x_days = np.arange(1, Ndays)
wsba_overflows2 = np.array(overflows)
wsba_fillings2 = np.array(bin_fillings)
wsrs_fillings2 = wsrs_fillings.copy()
wsrs_fillings2[wsrs_fillings2 < 0] = 0

mean_wsrs_fillings = np.mean(wsrs_fillings2, axis=1)
std_wsrs_fillings = np.std(wsrs_fillings2, axis=1)
mean_wsba_fillings = np.mean(wsba_fillings2, axis=1)
std_wsba_fillings = np.std(wsba_fillings2, axis=1)

print(mean_wsrs_fillings)
print(std_wsrs_fillings)
print(mean_wsba_fillings)
print(std_wsba_fillings)

In [None]:
wsba2_maxmin = (np.min(wsba_fillings2), np.max(wsba_fillings2))
df = pd.DataFrame([[wsrs_maxmin[0], wsba_maxmin[0], wsba2_maxmin[0]], [wsrs_maxmin[1], wsba_maxmin[1], wsba2_maxmin[1]]], ['Minimum fill', 'Maximum fill'], ['Gamma', 'WSBA', 'WSBA (y < 0 = 0)'])
df.T

In [None]:
fig, ax = plt.subplots()
fig.figsize=(20, 10)
ax.set_xticks(x_days)
ax.set_title("Mu and Sigma of the daily bins' fillings (y < 0 = 0)")
ax.yaxis.grid(True)
wsrs_bar = ax.bar(x_days - 0.2, mean_wsrs_fillings, yerr=std_wsrs_fillings, width=0.4, label='Gamma', align='center', alpha=0.5, ecolor='black', capsize=10)
wsba_bar = ax.bar(x_days + 0.2, mean_wsba_fillings, yerr=std_wsba_fillings, width=0.4, label='WSBA', align='center', alpha=0.5, ecolor='black', capsize=10)
ax.set_xlabel('Day of the simulation')
ax.set_ylabel('Bins fillings')
ax.bar_label(wsrs_bar)
ax.bar_label(wsba_bar)
fig.legend()

In [None]:
x_days = np.arange(0, Ndays)
plt.plot(x_days, wsrs_overflows, label=f"Gamma Distribution OP225")
plt.plot(x_days, wsba_overflows, label="WSmart+ Bin Analysis Grid")
plt.plot(x_days, wsba_overflows2, label="WSmart+ Bin Analysis Grid (y < 0 = 0)")
plt.xlabel('Day')
plt.ylabel('#overflows')
plt.title('Cumulative #overflows')
plt.legend()
plt.show()

## Generate Graph
### Generate edges

In [None]:
graph_size = wsrs_size
num_edges = math.ceil(graph_size * (graph_size - 1) / 3 - 2 * (graph_size - 1))
print(num_edges)

In [None]:
edge_idx = generate_edge_idx(graph_size, num_edges)
print(edge_idx.size())
print(edge_idx)

In [None]:
edge_idx = generate_edge_idx(graph_size, num_edges)
print(edge_idx.size())
print(edge_idx)

In [None]:
print(edge_idx.size())
edge_idx = sort_by_pairs(graph_size, edge_idx)
print(edge_idx)

### Generate Nodes

In [None]:
depot_idx = torch.cat((torch.zeros(graph_size, dtype=torch.long), torch.range(start=1, end=graph_size))).reshape(2, graph_size)
print(depot_idx.size())
print(depot_idx)

In [None]:
full_idx = torch.cat((depot_idx, edge_idx), dim=1)
print(full_idx.size())
print(full_idx)

In [None]:
sorted_idx = segregate_self_loops(full_idx)
print(sorted_idx[-2].size())

## Orienteering Problem

In [None]:
n_samples = 10
data_dist = 'unif'
problem = load_problem('op')
dataset = problem.make_dataset(size=graph_size, num_samples=n_samples, distribution=data_dist)
dataloader = DataLoader(dataset, batch_size=dataset.__len__())
_, batch = next(enumerate(dataloader))
print(batch.keys())
print(batch['loc'].size())
print(batch['edge_idx'].size())

In [None]:
print(batch['loc'].size())
print(batch['depot'].size())