In [None]:
import json
import os
import re
import sys
import traceback

import folium
import pandas as pd

from .notebook_setup import setup_home_directory

NOTEBOOK_NAME = "output_show"
home_dir = setup_home_directory(NOTEBOOK_NAME)

inout_dir = os.path.join(home_dir, "assets", "output")
data_dir = os.path.join(home_dir, "data", "wsr_simulator")
daily_waste_dir = os.path.join(data_dir, "daily_waste")
coords_dir = os.path.join(data_dir, "coordinates")
try:
    os.makedirs(inout_dir, exist_ok=True)
    os.makedirs(data_dir, exist_ok=True)
    os.makedirs(daily_waste_dir, exist_ok=True)
    os.makedirs(coords_dir, exist_ok=True)
except Exception:
    traceback.print_exc(file=sys.stdout)

Setup completed - added home_dir to system path: /home/pkhunter/Repositories/WSmart-Route


In [2]:
def plot_route_folium(
    route,
    df_coords,
    output_path,
    marker_color="gray",
    deposito_lat=None,
    deposito_lon=None,
):
    df_coords.columns = [col.strip().upper() for col in df_coords.columns]
    # Ensure that the ID is of type string to avoid issues during comparison
    df_coords["ID"] = df_coords["ID"].astype(str)
    points = []
    for idx in route:
        if idx == 0 and deposito_lat is not None and deposito_lon is not None:
            lat, lon = deposito_lat, deposito_lon
        else:
            # Search for the ID in the DataFrame
            row = df_coords[df_coords["ID"] == str(idx)]
            if row.empty:
                print(f"WARNING: could not find ID {idx} in the DataFrame!")
                continue
            row = row.iloc[0]
            lat = float(str(row["LAT"]).replace(",", "."))
            lon = float(str(row["LNG"]).replace(",", "."))
            f"ID {idx} (ID_coordinate: {row['ID']})"
        points.append((lat, lon))

    map_center = points[0] if points else [0, 0]
    m = folium.Map(location=map_center, zoom_start=13)
    for (lat, lon), idx in zip(points, route):
        if idx == 0:
            folium.Marker(
                location=(lat, lon),
                popup="Depósito (id 0)",
                icon=folium.Icon(color="green", icon="home"),
            ).add_to(m)
        else:
            folium.CircleMarker(
                location=(lat, lon),
                radius=6,
                color=marker_color,
                fill=True,
                fill_opacity=0.7,
                popup=f"ID {idx}",
            ).add_to(m)
    if points:
        folium.PolyLine(points, color="blue", weight=2, opacity=0.8).add_to(m)

    m.save(output_path)
    return


# Function for log
def json_log_to_excels(json_path, output_path):
    with open(json_path, "r") as f:
        data = json.load(f)
    # If necessary transforms DataFrame to "long" (key as column)
    df = pd.DataFrame.from_dict(data, orient="index").reset_index()
    df.rename(columns={"index": "policy"}, inplace=True)
    df.to_excel(output_path, index=False)
    print(f"Saved DataFrame to '{output_path}'.")
    return


# Function for daily
def json_daily_to_excels(json_path, pasta_saida, coords_xlsx_path, deposito_lat=None, deposito_lon=None):
    os.makedirs(pasta_saida, exist_ok=True)
    df_coords = pd.read_excel(coords_xlsx_path)
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    for modelo, campos in data.items():
        n = len(campos["day"])
        linhas = []
        pasta_modelo = os.path.join(pasta_saida, modelo.replace("#", "_").replace("/", "_").replace("\\", "_"))
        os.makedirs(pasta_modelo, exist_ok=True)
        pasta_mapas = os.path.join(pasta_modelo, "mapas")
        os.makedirs(pasta_mapas, exist_ok=True)
        for i in range(n):
            # --- generates newline in excel
            linha = {
                "day": campos["day"][i],
                "kg": campos["kg"][i],
                "overflows": campos["overflows"][i],
                "ncol": campos["ncol"][i],
                "kg_lost": campos["kg_lost"][i],
                "km": campos["km"][i],
                "kg_per_km": campos["kg/km"][i],
                "cost": campos["cost"][i],
                "tour": "[" + ",".join(str(x) for x in campos["tour"][i]) + "]",
            }
            linhas.append(linha)
            # --- generates HTML map for the current day
            route = campos["tour"][i]
            output_map = os.path.join(pasta_mapas, f"rota_dia{campos['day'][i]}.html")
            plot_route_folium(
                route,
                df_coords,
                output_map,
                deposito_lat=deposito_lat,
                deposito_lon=deposito_lon,
            )
        # Save the excel for the given model
        df = pd.DataFrame(linhas)
        safe_modelo = modelo.replace("#", "_").replace("/", "_").replace("\\", "_")
        path_excel = os.path.join(pasta_modelo, f"{safe_modelo}.xlsx")
        df.to_excel(path_excel, index=False)
    print(f"Saved maps and archives to '{pasta_saida}'.")
    return

Gerando um excel para os dados de saida do simulador: Pega os logs e gera um excel para comparar cada modelo

In [3]:
n_days = 31
n_bins = 104
n_samples = 1
area = "Rio Maior"
area = re.sub(r"[^a-zA-Z]", "", area.lower())

input_path = os.path.join(inout_dir, f"{n_days}_days", f"{area}_{n_bins}", f"log_mean_{n_samples}N.json")
output_path = os.path.join(inout_dir, f"{n_days}_days", f"{area}_{n_bins}", f"log_mean_{n_samples}N.xlsx")
json_log_to_excels(input_path, output_path)

Saved DataFrame to '/home/pkhunter/Repositories/WSmart-Route/assets/output/31_days/riomaior_104/log_mean_1N.xlsx'.


O seguinte código lê o json de saida do simulador e gera uma pasta para cada modelo contendo um excel com as informações necessárias e uma subpasta com os mapas de cada rota gerada

In [4]:
data_dist = "emp"
input_path = os.path.join(
    inout_dir,
    f"{n_days}_days",
    f"{area}_{n_bins}",
    f"daily_{data_dist}_{n_samples}N.json",
)
output_path = os.path.join(inout_dir, f"{n_days}_days", f"{area}_{n_bins}", f"daily_{data_dist}_{n_samples}N")
json_daily_to_excels(
    input_path,
    output_path,
    coords_xlsx_path=os.path.join(coords_dir, "Coordinates_unique.xlsx"),
    deposito_lat=39.1838505324,
    deposito_lon=-9.14806472054,
)

Saved maps and archives to '/home/pkhunter/Repositories/WSmart-Route/assets/output/31_days/riomaior_104/daily_emp_1N'.


## Verify daily waste fills

In [None]:
import glob
import pickle
import re
import traceback

import numpy as np
import openpyxl
from logic.src.pipeline.simulations.loader import load_simulator_data

from .notebook_setup import setup_home_directory

NOTEBOOK_NAME = "output_show"
home_dir = setup_home_directory(NOTEBOOK_NAME)
SEED = 42

n_days = 31
n_bins = 170
n_samples = 10
data_dist = "gamma1"
area = "Rio Maior"
area = re.sub(r"[^a-zA-Z]", "", area.lower())
data_dir = os.path.join(home_dir, "data", "wsr_simulator")
daily_waste_dir = os.path.join(data_dir, "daily_waste")
output_dir = os.path.join(home_dir, "assets", "output", f"{n_days}_days", f"{area}_{n_bins}")
fill_history_dir = os.path.join(output_dir, "fill_history", data_dist)
try:
    os.makedirs(data_dir, exist_ok=True)
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(daily_waste_dir, exist_ok=True)
    os.makedirs(fill_history_dir, exist_ok=True)
except Exception:
    traceback.print_exc(file=sys.stdout)

Already added home_dir to system path: /home/pkhunter/Repositories/wsmart_route


In [None]:
def get_files_for_first_prefix(directory):
    """
    Get files for the first prefix found (alphabetically)
    """
    # Get all sample files in the directory
    pattern = os.path.join(directory, "*sample[0-9]*.xlsx")
    all_files = glob.glob(pattern)

    if not all_files:
        return []

    # Extract all unique prefixes
    prefixes = set()
    for file in all_files:
        filename = os.path.basename(file)
        # Split on the last underscore before 'sample'
        if "_" in filename and "sample" in filename:
            prefix = filename.rsplit("_", 1)[0]
            prefixes.add(prefix)

    # Get the first prefix alphabetically
    first_prefix = sorted(prefixes)[0]

    # Get ALL files for this first prefix
    prefix_pattern = os.path.join(directory, f"{first_prefix}_*sample.xlsx")
    return sorted(glob.glob(prefix_pattern))


def delete_files_except_pattern(directory, pattern):
    """
    Delete all files in directory except those matching the pattern
    """
    # Get all files in the directory
    all_files = glob.glob(os.path.join(directory, "*"))

    # Get files to keep (matching the pattern)
    files_to_keep = glob.glob(os.path.join(directory, pattern))

    # Convert to sets for easier comparison
    all_files_set = set(all_files)
    keep_files_set = set(files_to_keep)

    # Files to delete = all files - files to keep
    files_to_delete = all_files_set - keep_files_set

    # Delete files
    deleted_count = 0
    parent_dir = os.path.dirname(os.getcwd())
    for file_path in files_to_delete:
        if os.path.isfile(file_path):  # Only delete files, not directories
            os.remove(file_path)
            print(f"Deleted: {os.path.relpath(file_path, parent_dir)}")
            deleted_count += 1

    print(f"Deleted {deleted_count} files. Kept {len(files_to_keep)} files.")
    return

In [None]:
data317, coords317 = load_simulator_data(data_dir, number_of_bins=317, area=area)
with open(os.path.join(data_dir, "bins_selection", f"graphs_{n_bins}V_1N_plastic.json")) as json_file:
    bin_sel = json.load(json_file)

df = data317.iloc[bin_sel[0]].reset_index()
bins = df["ID"]

O seguinte Script gera uma pasta com as arquivos .xlsx com as tabelas de enchimento diário dos contentores para cada Sample

In [None]:
path = os.path.join(
    daily_waste_dir,
    f"{area}{n_bins}_{data_dist}_wsr{n_days}_N{n_samples}_seed{SEED}.pkl",
)
with open(path, "rb") as f:
    data = pickle.load(f)

folder_name = os.path.join(daily_waste_dir, f"{area}{n_bins}_{data_dist}_wsr{n_days}_N{n_samples}_seed{SEED}")
try:
    os.makedirs(folder_name, exist_ok=True)
except Exception:
    traceback.print_exc(file=sys.stdout)

num_versions = len(data)
num_days = len(data[0])
num_bins = len(data[0][0])
for i in range(num_versions):
    df = pd.DataFrame(data[i])
    df = df.transpose()  # Transpose to make bins as rows and days as columns
    df.index = bins
    df.index.name = "ID"
    df.columns = [f"Dia {j + 1}" for j in range(num_days)]
    df.reset_index(inplace=True)
    file_path = os.path.join(folder_name, f"tabela_versao_{i}.xlsx")
    df.to_excel(file_path, index=False)

parent_dir = os.path.dirname(os.getcwd())
print(f"All {num_versions} tables where saved to '{os.path.relpath(folder_name, parent_dir)}'.")

In [None]:
# Create column names for the days
day_columns = [f"day {i + 1}" for i in range(n_days)]
for file_path in get_files_for_first_prefix(fill_history_dir):
    # Load the workbook and select sheet
    wb = openpyxl.load_workbook(file_path, data_only=True)  # data_only=True to get calculated values
    ws = wb.active  # Get the active sheet, or use wb['SheetName']

    # Convert worksheet data to numpy matrix
    data = []
    for row in ws.iter_rows(values_only=True):
        data.append(row)

    matrix = np.array(data, dtype=float)  # Use dtype=object if you have mixed types

    # Create the new DataFrame
    result_df = pd.DataFrame(
        data=matrix.astype(int),  # Your 50x31 numpy matrix
        columns=day_columns,  # Column names: day 1, day 2, ..., day 31
    )

    # Add the 'ID' column as the first column
    result_df.insert(0, "ID", df["ID"].values)
    result_df.insert(1, "Mean", np.round(np.mean(matrix, axis=1), 1))
    result_df.insert(2, "StD", np.round(np.std(matrix, axis=1), 1))

    # Save to Excel
    sid = file_path[file_path.find("#") + 1]
    parent_dir = os.path.dirname(os.getcwd())
    output_path = os.path.join(
        output_dir,
        "fill_history",
        data_dist,
        f"enchimentos_seed{SEED}_sample{sid}.xlsx",
    )
    result_df.to_excel(output_path, index=False, engine="openpyxl")
    print(f"DataFrame saved to: {os.path.relpath(output_path, parent_dir)}")

In [None]:
delete_files_except_pattern(fill_history_dir, "enchimentos*")

In [None]:
from logic.src.pipeline.simulations.loader import (
    load_depot,
    load_indices,
    load_simulator_data,
)
from logic.src.pipeline.simulations.network import apply_edges, compute_distance_matrix
from logic.src.pipeline.simulations.processor import process_data

from .notebook_setup import setup_home_directory

NOTEBOOK_NAME = "output_show"
home_dir = setup_home_directory(NOTEBOOK_NAME)

area = "Rio Maior"
src_area = area.translate(str.maketrans("", "", "-_ ")).lower()
data_dir = os.path.join(home_dir, "data", "wsr_simulator")
sub_dm_dir = os.path.join(data_dir, "distance_matrix", "submatrix")
sel_coords_dir = os.path.join(data_dir, "coordinates", "selected_coordinates")
try:
    os.makedirs(data_dir, exist_ok=True)
    os.makedirs(sub_dm_dir, exist_ok=True)
    os.makedirs(sel_coords_dir, exist_ok=True)
except Exception:
    traceback.print_exc(file=sys.stdout)

In [None]:
depot = load_depot(data_dir, src_area)
depot_tmp = depot.copy()
data317, coords317 = load_simulator_data(data_dir, number_of_bins=317, area=src_area)
dist_matrix317 = compute_distance_matrix(pd.concat([depot_tmp, coords317]).reset_index(drop=True), method="og")

In [None]:
edge_thresh = 0.0
edge_method = "knn"
env_file = "vars.env"
waste_type = "plastic"
n_bins_ls = [20, 50, 100, 170]

dist_mat_method = "gmaps"
parent_dir = os.path.dirname(os.getcwd())
for n_bins in n_bins_ls:
    idx_filename = f"graphs_{n_bins}V_1N_{waste_type}.json"
    indices_ls = load_indices(idx_filename, 10, n_bins, 317)
    new_data, coordinates = process_data(data317, coords317, depot_tmp, indices=indices_ls[0])
    clean_coords = coordinates.drop("#bin", axis=1)
    coords_filepath = os.path.join(sel_coords_dir, f"coordinates{n_bins}_{waste_type}[{src_area}].xlsx")
    clean_coords.to_excel(coords_filepath, index=False)
    print(f"Coordinates for selected bins saved to: {os.path.relpath(coords_filepath, parent_dir)}")

    dm_filepath = os.path.join(
        data_dir,
        "distance_matrix",
        f"{dist_mat_method}_distmat_{waste_type}[{src_area}].csv",
    )
    distance_matrix = compute_distance_matrix(
        coordinates,
        dist_mat_method,
        focus_idx=indices_ls[0],
        dm_filepath=dm_filepath,
        env_filename=env_file,
    )
    dist_matrix_edges, shortest_paths, adj_matrix = apply_edges(distance_matrix, edge_thresh, edge_method)
    submatrix_filepath = os.path.join(sub_dm_dir, f"{dist_mat_method}_distmat{n_bins}_{waste_type}[{src_area}].csv")
    np.savetxt(submatrix_filepath, dist_matrix_edges, delimiter=",", fmt="%.3f")
    print(f"Distance submatrix saved to: {os.path.relpath(submatrix_filepath, parent_dir)}")