In [1]:
import numpy as np
from datetime import datetime, timedelta
import pandas as pd

# Code parameters
u_parcels = 2000 # Expected number of parcels
std_parcels = 800 # Standard dev. of parcels
num_datasets = 3 # Number of datasets
lower_limit_outfeeds = 20 # Lowest number of outfeeds
upper_limit_outfeeds = 60 # Highest number of outfeeds

# Function to generate synthetic parcel data
def generate_parcel_data(num_parcels=int(np.random.normal(1500, 500)), start_time="2025-04-09 09:00:00"):
    base_time = datetime.strptime(start_time, "%Y-%m-%d %H:%M:%S")
    data = []
    
    # Randomize the number of outfeeds between 20 and 60
    total_outfeeds = np.random.randint(lower_limit_outfeeds, upper_limit_outfeeds)
    
    for i in range(1, num_parcels + 1):
        # Arrival time with incremental randomness
        arrival_delta = timedelta(milliseconds=np.random.randint(100, 3000))
        base_time += arrival_delta
        arrival_time = base_time
        
        # Parcel dimensions
        length = round(np.random.normal(1.2, 0.3), 3)
        width = round(np.random.normal(0.7, 0.3), 3)
        height = round(np.random.normal(0.5, 0.3), 3)
        weight = round(length * width * height * 1000 + np.random.normal(1000, 500), 1)
        
        # Create outfeed assignments with randomized number of assignments
        # Assign to a random number between 10% and 40% of the outfeeds
        min_outfeeds = 0
        max_outfeeds = max(2, int(total_outfeeds * 0.4))
        num_assigned_outfeeds = np.random.randint(min_outfeeds, max_outfeeds + 1)
        outfeeds = [False] * total_outfeeds
        true_indices = np.random.choice(range(total_outfeeds), size=num_assigned_outfeeds, replace=False)
        for idx in true_indices:
            outfeeds[idx] = True
        
        row = [i, arrival_time, length, width, height, weight] + outfeeds
        data.append(row)
    
    columns = ['Parcel Number', 'Arrival Time', 'Length', 'Width', 'Height', 'Weight'] + [f'Outfeed {i}' for i in range(1, total_outfeeds + 1)]
    return pd.DataFrame(data, columns=columns)

# Function to generate random layout data
def generate_layout_data():
    layout = {
        "Layout property": [
            "Belt Speed",
            "Distance Infeeds to Scanner",
            "Distance Scanner to Outfeeds",
            "Distance between Outfeeds",
            "Distance Infeeds to Arrival"
        ],
        "Value": [
            round(np.random.uniform(1.0, 2.0), 2),
            round(np.random.uniform(10.0, 20.0), 1),
            round(np.random.uniform(4.0, 6.0), 1),
            round(np.random.uniform(1.5, 2.5), 1),
            round(np.random.uniform(10.0, 15.0), 1)
        ],
        "Unit": ["m/s", "m", "m", "m", "m"]
    }
    return pd.DataFrame(layout)

# Generate datasets
datasets = []
print("Generating datasets with randomized parameters...")
for i in range(num_datasets):
    # Force at least 1000 parcels
    parcels_df = generate_parcel_data(num_parcels=int(np.random.normal(u_parcels, std_parcels)))
    
    # Ensure we have at least 1000 parcels
    while len(parcels_df) < 1000:
        parcels_df = generate_parcel_data(num_parcels=int(np.random.normal(u_parcels, std_parcels)))
    
    print(f"Dataset {i+1} created with {len(parcels_df)} parcels and {len(parcels_df.columns) - 6} outfeeds")
    layout_df = generate_layout_data()
    datasets.append((parcels_df, layout_df))

# Save datasets to Excel files
output_paths = []
for i, (parcels, layout) in enumerate(datasets, start=1):
    output_path = f"PosiSorterData_{i}.xlsx"  # Fixed path formatting
    print(f"Saving dataset {i} to {output_path}...")
    with pd.ExcelWriter(output_path, datetime_format='YYYY-MM-DD HH:MM:SS.000') as writer:
        parcels.to_excel(writer, sheet_name="Parcels", index=False)
        layout.to_excel(writer, sheet_name="Layout", index=False)
    output_paths.append(output_path)

print(f"Successfully created {len(output_paths)} datasets with the following paths:")
for path in output_paths:
    print(f" - {path}")

Generating datasets with randomized parameters...
Dataset 1 created with 2799 parcels and 57 outfeeds
Dataset 2 created with 1101 parcels and 33 outfeeds
Dataset 3 created with 2374 parcels and 32 outfeeds
Saving dataset 1 to PosiSorterData_1.xlsx...
Saving dataset 2 to PosiSorterData_2.xlsx...
Saving dataset 3 to PosiSorterData_3.xlsx...
Successfully created 3 datasets with the following paths:
 - PosiSorterData_1.xlsx
 - PosiSorterData_2.xlsx
 - PosiSorterData_3.xlsx


In [1]:
pip install joblib sklearn.ensemble

Defaulting to user installation because normal site-packages is not writeable
Collecting joblib
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement sklearn.ensemble (from versions: none)

[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\20231620\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
ERROR: No matching distribution found for sklearn.ensemble
