In [1]:
# Imports and config
import os
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import math
from scipy import stats  # for Pearson correlation

# API from your generator module; ensure knapsack_data_generator.py is in same folder or on PYTHONPATH
from knapsack_data_generator import generate_batch

# configure defaults
OUT_ROOT = Path("knapsack_multisize_data")
OUT_ROOT.mkdir(exist_ok=True)
SIZES = [50, 200, 500]          # change or extend as you like
BASE_SEED = 20251018
CAPACITY_RATIOS = [0.2, 0.5, 0.8]
WEIGHT_RANGE = (1, 500)
VALUE_RANGE = (1, 1000)
FORCE_OVERWRITE = True            # set False to keep previously generated files

In [2]:
# (Assuming imports, SIZES, CAPACITY_RATIOS, OUT_ROOT, etc., are defined)
from tqdm import tqdm # <-- Import tqdm

# Generate batches for uniform / normal / zipf
meta = {} # This will now be nested: meta[dist_name][cap_ratio_name]

print(f"Generating instances for sizes: {SIZES}")
print(f"Across capacity ratios: {CAPACITY_RATIOS}")
print(f"Across weight distribution : {"normal", "uniform", "zipf"}")

dists = {
    # Using larger offsets to ensure seed ranges are distinct
    "uniform": {"weight_dist":"uniform", "value_dist":"uniform", "seed_offset": 0},
    "normal":  {"weight_dist":"normal",  "value_dist":"normal",  "seed_offset": 100},
    "zipf":    {"weight_dist":"zipf",    "value_dist":"zipf",    "seed_offset": 200}
}

# --- MODIFICATION: Create a flat list of all generation tasks ---
tasks = []
for dist_name, cfg in dists.items():
    meta[dist_name] = {} # Pre-initialize sub-dictionary
    for cap_ratio in CAPACITY_RATIOS:
        tasks.append((dist_name, cfg, cap_ratio))

# ----------------------------------------------------------------

# --- MODIFICATION: Loop over the flat task list with tqdm ---
print(f"\nGenerating {len(tasks)} instance batches...")

for dist_name, cfg, cap_ratio in tqdm(tasks, desc="Generating Batches"):
    
    # Create a safe directory name, e.g., "cap_0.2"
    cap_name = f"cap_{cap_ratio}" 
    
    # Create a nested output directory, e.g., knapsack_multisize_data/uniform/cap_0.2
    out_dir = OUT_ROOT / dist_name / cap_name 
    out_dir.mkdir(parents=True, exist_ok=True)

    # (Removed the print statement here, tqdm handles progress)

    # Calculate a unique base seed for this combination
    batch_base_seed = BASE_SEED + cfg["seed_offset"] + int(cap_ratio * 10) 

    # Call generate_batch with the specific capacity_ratio and seed
    # NOTE: The generate_batch function itself prints "Saved: ..."
    # This will interleave with the progress bar, which is fine.
    records = generate_batch(
        ns=SIZES,
        output_dir=str(out_dir),
        weight_range=WEIGHT_RANGE,
        value_range=VALUE_RANGE,
        capacity_ratio=cap_ratio,  # Pass the current loop's ratio
        correlation=None,
        weight_dist=cfg["weight_dist"],
        value_dist=cfg["value_dist"],
        base_seed=batch_base_seed,   # Pass the new unique base seed
        force_overwrite=FORCE_OVERWRITE
    )
    
    # Store records in the nested meta dictionary
    meta[dist_name][cap_name] = records

# ----------------------------------------------------------------

# Save a small metadata summary
meta_path = OUT_ROOT / "generation_metadata.json"
with open(meta_path, "w", encoding="utf8") as f:
    json.dump(meta, f, indent=2)

print(f"\nGeneration complete. Metadata saved to {meta_path}")

Generating instances for sizes: [50, 200, 500]
Across capacity ratios: [0.2, 0.5, 0.8]
Across weight distribution : ('normal', 'uniform', 'zipf')

Generating 9 instance batches...


Generating Batches: 100%|██████████| 9/9 [00:00<00:00, 147.54it/s]


Generation complete. Metadata saved to knapsack_multisize_data\generation_metadata.json





In [3]:
# Helper to load an instance JSON given the record returned by generate_batch
def load_instance_json(path):
    path = Path(path)
    if not path.exists():
        print(f"Warning: File not found, skipping: {path}")
        return None
    with path.open("r", encoding="utf8") as f:
        return json.load(f)

# Build a mapping: mapping[size][dist][cap_ratio_name] -> instance dict
# (This assumes SIZES, meta, and Path are defined from the previous script)

# Initialize the 3-level nested dictionary
instances = {n: {} for n in SIZES}
for n in SIZES:
    for dist_name in meta.keys(): # e.g., 'uniform', 'normal'
        instances[n][dist_name] = {} # e.g., instances[50]['uniform'] = {}

# --- Updated Loading Loop ---
# We now have a 3-level loop to match the meta structure
for dist_name, cap_dict in meta.items():
    # cap_dict is {'cap_0.2': [...], 'cap_0.5': [...], ...}
    for cap_name, records in cap_dict.items():
        # records is the list of records for this config, e.g., [{'n': 50, ...}, {'n': 200, ...}]
        for rec in records:
            n = rec["n"]
            json_path = rec["json"]
            
            inst = load_instance_json(json_path)
            if inst:
                # Store in the new 3-level structure
                instances[n][dist_name][cap_name] = inst
            
# --- Updated Verification Print ---
print("--- Verification of Loaded Instances ---")
for n in SIZES:
    print(f"Size {n}:")
    if not instances[n]:
        print("  No data loaded.")
        continue
        
    for dist_name, cap_data in instances[n].items():
        # This lists the capacity ratios loaded for this size and distribution
        loaded_ratios = list(cap_data.keys()) 
        print(f"  Dist '{dist_name}': loaded ratios -> {loaded_ratios}")

# Example of how to access a specific instance
try:
    first_size = SIZES[0]
    first_dist = list(meta.keys())[0]
    first_cap = list(meta[first_dist].keys())[0]
    
    example_instance = instances[first_size][first_dist][first_cap]
    
    print("\n--- Example Access ---")
    print(f"Accessing: instances[{first_size}]['{first_dist}']['{first_cap}']")
    print(f"  -> Instance capacity: {example_instance['capacity']}")
    print(f"  -> Instance items: {len(example_instance['items'])}")
    print(f"  -> Instance meta seed: {example_instance['meta']['seed']}")
except Exception as e:
    print(f"\nCould not access example instance: {e}")
    print("Check if generation was successful and 'meta' object is populated.")

--- Verification of Loaded Instances ---
Size 50:
  Dist 'uniform': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'normal': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'zipf': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
Size 200:
  Dist 'uniform': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'normal': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'zipf': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
Size 500:
  Dist 'uniform': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'normal': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']
  Dist 'zipf': loaded ratios -> ['cap_0.2', 'cap_0.5', 'cap_0.8']

--- Example Access ---
Accessing: instances[50]['uniform']['cap_0.2']
  -> Instance capacity: 2516
  -> Instance items: 50
  -> Instance meta seed: 20251020


In [4]:
# (Assuming imports like plt, Path, SIZES, OUT_ROOT, and the 'instances' dict are loaded)
from tqdm import tqdm  # <-- Import the tqdm library for progress bars

# Plotting comparative histograms for each size
SAVE_PLOTS = True
# --- MODIFICATION: Define root plot dir and specific subdir ---
PLOTS_ROOT = OUT_ROOT / "plots"
PLOTS_DIR = PLOTS_ROOT / "distributions" # <-- New subdirectory name
PLOTS_ROOT.mkdir(exist_ok=True)
PLOTS_DIR.mkdir(exist_ok=True) # <-- Create the new subdirectory
# -------------------------------------------------------------

# Define the order for columns
dist_order = ["uniform", "normal", "zipf"]

# --- MODIFICATION: Create a flat list of all plotting tasks ---
# (This logic is unchanged)
plot_tasks = []
for n in SIZES:
    # Find all capacity ratios loaded for this size
    all_cap_ratios_for_n = set()
    for dname in dist_order:
        if n in instances and dname in instances[n]:
            all_cap_ratios_for_n.update(instances[n][dname].keys())

    if not all_cap_ratios_for_n:
        print(f"No data found for size {n}, skipping plot.")
        continue
    
    sorted_cap_ratios = sorted(list(all_cap_ratios_for_n))
    
    # Add a (size, cap_name) tuple for each plot to be generated
    for cap_name in sorted_cap_ratios:
        plot_tasks.append((n, cap_name))

# --- MODIFICATION: Loop over the flat task list with tqdm ---
if plot_tasks:
    # --- MODIFICATION: Updated print statement ---
    print(f"Generating {len(plot_tasks)} distribution histogram plots...")
    
    # Wrap the iterable 'plot_tasks' with tqdm() to create a progress bar
    for n, cap_name in tqdm(plot_tasks, desc="Generating Histograms"):
        
        fig, axes = plt.subplots(2, 3, figsize=(15, 8), tight_layout=True)
        
        # determine reasonable bin counts
        bins_w = 40 if n <= 500 else 80
        bins_v = 40 if n <= 500 else 80
        
        capacity_str = "capacity=N/A" 

        # Loop over each distribution (columns)
        for j, dname in enumerate(dist_order):
            
            # Access the nested instance
            inst = instances[n].get(dname, {}).get(cap_name)
            
            if inst is None:
                axes[0, j].text(0.5,0.5, f"missing\n{dname}/{cap_name}", ha='center')
                axes[1, j].text(0.5,0.5, f"missing\n{dname}/{cap_name}", ha='center')
                continue

            if j == 0 or capacity_str == "capacity=N/A":
                capacity_str = f"capacity={inst['capacity']}"

            weights = [it["weight"] for it in inst["items"]]
            values  = [it["value"]  for it in inst["items"]]

            # Plot Weight
            axes[0, j].hist(weights, bins=bins_w)
            axes[0, j].set_title(f"Weights — {dname.capitalize()}")
            axes[0, j].set_xlabel("weight")
            axes[0, j].set_ylabel("count")
            axes[0, j].grid(axis='y', linestyle='--', alpha=0.7)

            # Plot Value
            axes[1, j].hist(values, bins=bins_v)
            axes[1, j].set_title(f"Values  — {dname.capitalize()}")
            axes[1, j].set_xlabel("value")
            axes[1, j].set_ylabel("count")
            axes[1, j].grid(axis='y', linestyle='--', alpha=0.7)
        
        fig.suptitle(f"Knapsack Item Distributions — n = {n}, {cap_name} ({capacity_str})", fontsize=16)

        if SAVE_PLOTS:
            # --- MODIFICATION: Path now points to the 'distributions' subdir ---
            out_path = PLOTS_DIR / f"distributions_n{n}_{cap_name}.png"
            # ------------------------------------------------------------------
            fig.savefig(out_path, bbox_inches="tight", dpi=150)
            # No print statement here, tqdm handles progress
        plt.close(fig)
            
        # --- MODIFICATION: plt.show() is removed ---
        
        # Close the figure to free up memory

Generating 9 distribution histogram plots...


Generating Histograms: 100%|██████████| 9/9 [00:04<00:00,  1.85it/s]


In [5]:
# (Assuming imports like plt, np, Path, SIZES, OUT_ROOT, and 'instances' are loaded)
from tqdm import tqdm # For progress bar

# --- MODIFICATION: Define root plot dir and specific subdir ---
PLOTS_ROOT = OUT_ROOT / "plots"  # Assuming OUT_ROOT is defined
SUMMARIES_DIR = PLOTS_ROOT / "summaries" # <-- New subdirectory name
PLOTS_ROOT.mkdir(exist_ok=True)
SUMMARIES_DIR.mkdir(exist_ok=True) # <-- Create the new subdirectory
# -------------------------------------------------------------

# --- Helper functions (from your script) ---
# (Removed the mkdir line from here)
def ecdf(data):
    a = np.sort(np.asarray(data))
    n = a.size
    y = np.arange(1, n+1) / n
    return a, y

def ccdf(data):
    a = np.sort(np.asarray(data))
    n = a.size
    y = 1.0 - (np.arange(0, n) / n)
    return a, y
# ------------------------------------------

SAVE = True
# Define order and colors for consistent plotting
dist_order = ["uniform", "normal", "zipf"]
colors = {"uniform":"tab:blue", "normal":"tab:green", "zipf":"tab:orange"}

# --- Create a flat list of all plotting tasks ---
# (This logic is unchanged)
plot_tasks = []
for n in sorted(instances.keys()):
    all_cap_ratios_for_n = set()
    for dname in dist_order:
        if n in instances and dname in instances[n]:
            all_cap_ratios_for_n.update(instances[n][dname].keys())
    
    for cap_name in sorted(list(all_cap_ratios_for_n)):
        plot_tasks.append((n, cap_name))

# --- Loop over tasks with tqdm progress bar ---
if plot_tasks:
    print(f"Generating {len(plot_tasks)} summary visualization plots...")
    
    for n, cap_name in tqdm(plot_tasks, desc="Generating Viz Summaries"):
        
        # --- Build the 'dist_map' for this specific n and cap_name ---
        dist_map = {}
        for dname in dist_order:
            inst = instances[n].get(dname, {}).get(cap_name)
            if inst:
                dist_map[dname] = inst
        
        if not dist_map:
            # Skip if no data for this combo
            continue 
        # -------------------------------------------------------------------------

        fig, axes = plt.subplots(2, 2, figsize=(12, 10), tight_layout=True)
        
        # scatter subplot (weight vs value), overlay distributions
        ax_scatter = axes[0,0]
        for dname, inst in dist_map.items():
            w = np.array([it["weight"] for it in inst["items"]])
            v = np.array([it["value"]  for it in inst["items"]])
            # sample if too many points
            if len(w) > 5000:
                idx = np.random.choice(len(w), size=5000, replace=False)
                ax_scatter.scatter(w[idx], v[idx], s=6, alpha=0.5, label=dname, color=colors.get(dname))
            else:
                ax_scatter.scatter(w, v, s=8, alpha=0.6, label=dname, color=colors.get(dname))
        ax_scatter.set_xlabel("weight")
        ax_scatter.set_ylabel("value")
        ax_scatter.set_title(f"Weight vs Value")
        ax_scatter.legend()
        ax_scatter.grid(True, linestyle='--', alpha=0.5)

        # ratio histogram (value/weight)
        ax_ratio = axes[0,1]
        for dname, inst in dist_map.items():
            # Added check for weight > 0 to avoid ZeroDivisionError
            ratio = np.array([it["value"]/it["weight"] for it in inst["items"] if it["weight"] > 0], dtype=float)
            ax_ratio.hist(ratio, bins=40, alpha=0.6, label=dname, color=colors.get(dname), density=True)
        ax_ratio.set_xlabel("value / weight")
        ax_ratio.set_ylabel("Density")
        ax_ratio.set_title("Value / Weight Ratio (Density)")
        ax_ratio.legend()

        # ECDF for weights
        ax_ecdf = axes[1,0]
        for dname, inst in dist_map.items():
            w = [it["weight"] for it in inst["items"]]
            x, y = ecdf(w)
            ax_ecdf.plot(x, y, label=dname, color=colors.get(dname))
        ax_ecdf.set_xlabel("weight")
        ax_ecdf.set_ylabel("ECDF (Cumulative Probability)")
        ax_ecdf.set_title("ECDF of weights")
        ax_ecdf.legend()
        ax_ecdf.grid(True, linestyle='--', alpha=0.5)

        # Boxplot comparing weight distributions
        ax_box = axes[1,1]
        labels = []
        data = []
        # Use dist_order to ensure consistent boxplot order
        for dname in dist_order:
            if dname in dist_map:
                labels.append(dname)
                data.append([it["weight"] for it in dist_map[dname]["items"]])
        
        if data:
            ax_box.boxplot(data, tick_labels=labels, showfliers=False)
        ax_box.set_ylabel("weight")
        ax_box.set_title("Boxplot — weights by distribution")

        # --- Updated title ---
        fig.suptitle(f"Knapsack Visual Summary — n={n}, {cap_name}", fontsize=16)
        
        if SAVE:
            # --- MODIFICATION: Save path now uses SUMMARIES_DIR ---
            out_path = SUMMARIES_DIR / f"viz_summary_n{n}_{cap_name}.png"
            fig.savefig(out_path, bbox_inches="tight", dpi=150)
            # print("Saved:", out_path) # Removed print for cleaner tqdm output
        
        # --- Removed plt.show() ---
        plt.close(fig)

    # --- MODIFICATION: Updated final print statement ---
    print(f"\nAll summary visualizations saved to {SUMMARIES_DIR}")
else:
    print("No instances found to visualize.")

Generating 9 summary visualization plots...


Generating Viz Summaries: 100%|██████████| 9/9 [00:03<00:00,  2.27it/s]


All summary visualizations saved to knapsack_multisize_data\plots\summaries





In [6]:
# (Assuming imports like plt, np, Path, SIZES, OUT_ROOT, stats, 'instances', and 'dist_order' are loaded)
import csv  # <-- Import CSV module
from tqdm import tqdm # <-- Import tqdm

# --- MODIFICATION: Define root plot dir and specific subdir ---
PLOTS_ROOT = OUT_ROOT / "plots"  # Assuming OUT_ROOT is defined
ZIPF_PLOTS_DIR = PLOTS_ROOT / "zipf_ccdf" # <-- New subdirectory name
PLOTS_ROOT.mkdir(exist_ok=True)
ZIPF_PLOTS_DIR.mkdir(exist_ok=True) # <-- Create the new subdirectory
# -------------------------------------------------------------

# --- Create a flat list of all tasks ---
# (This logic is unchanged)
plot_tasks = []
for n in sorted(instances.keys()):
    all_cap_ratios_for_n = set()
    for dname in dist_order: # dist_order = ["uniform", "normal", "zipf"]
        if n in instances and dname in instances[n]:
            all_cap_ratios_for_n.update(instances[n][dname].keys())
    
    for cap_name in sorted(list(all_cap_ratios_for_n)):
        plot_tasks.append((n, cap_name))

# --- List to store summary data for CSV ---
summary_data = []
csv_header = [
    "n", "distribution", "capacity_ratio", 
    "mean_weight", "std_weight", "mean_value", "std_value", 
    "pearson_r", "p_value"
]

print(f"\nGenerating {len(plot_tasks)} Zipf CCDF plots and numeric summaries...")

# --- Loop over tasks with tqdm ---
for n, cap_name in tqdm(plot_tasks, desc="Summarizing Instances"):

    # --- 1. Zipf log-log CCDF plot ---
    # Check if a 'zipf' instance exists for this (n, cap_name)
    inst_zipf = instances[n].get("zipf", {}).get(cap_name)
    
    if inst_zipf:
        w = np.array([it["weight"] for it in inst_zipf["items"]])
        x, y = ccdf(w) # ccdf helper function must be defined
        mask = (x > 0) & (y > 0)
        x_m, y_m = x[mask], y[mask]
        
        fig, ax = plt.subplots(figsize=(6,4))
        ax.loglog(x_m, y_m, marker='.', markersize=4, linestyle='none')
        ax.set_xlabel("weight (log)")
        ax.set_ylabel("CCDF (log)")
        ax.set_title(f"Zipf — CCDF (log-log) n={n}, {cap_name}")
        ax.grid(True, which="both", ls="--", alpha=0.5)
        
        # --- MODIFICATION: Save path now uses ZIPF_PLOTS_DIR ---
        out_path = ZIPF_PLOTS_DIR / f"zipf_ccdf_n{n}_{cap_name}.png"
        fig.savefig(out_path, bbox_inches="tight", dpi=150)
        
        # plt.show() # <-- Removed
        plt.close(fig) # <-- Close figure to free memory

    # --- 2. Numeric summary & correlation ---
    # (This logic is unchanged)
    
    for dname in dist_order:
        inst = instances[n].get(dname, {}).get(cap_name)
        if not inst:
            continue
            
        w = np.array([it["weight"] for it in inst["items"]])
        v = np.array([it["value"]  for it in inst["items"]])
        
        mean_w, std_w = (w.mean(), w.std(ddof=0)) if len(w) > 0 else (0, 0)
        mean_v, std_v = (v.mean(), v.std(ddof=0)) if len(v) > 0 else (0, 0)
        
        # Pearson correlation (safe)
        r, p = (float('nan'), float('nan'))
        if len(w) > 1 and len(v) > 1 and std_w > 0 and std_v > 0:
            try:
                r, p = stats.pearsonr(w, v)
            except Exception:
                pass # Keep default nan
        
        # Optional: print to console
        # print(f" {dname:7s} | mean_w={mean_w:6.1f} std_w={std_w:6.1f} | mean_v={mean_v:7.1f} std_v={std_v:6.1f} | pearson_r={r:6.3f} (p={p:.2g})")
        
        # Append data for CSV
        summary_data.append({
            "n": n,
            "distribution": dname,
            "capacity_ratio": cap_name,
            "mean_weight": round(mean_w, 2),
            "std_weight": round(std_w, 2),
            "mean_value": round(mean_v, 2),
            "std_value": round(std_v, 2),
            "pearson_r": round(r, 5) if not np.isnan(r) else r,
            "p_value": round(p, 5) if not np.isnan(p) else p
        })

# --- 3. Write summary data to CSV file ---
summary_path = OUT_ROOT / "numeric_summary.csv"
try:
    with open(summary_path, "w", newline='', encoding='utf8') as f:
        writer = csv.DictWriter(f, fieldnames=csv_header)
        writer.writeheader()
        writer.writerows(summary_data)
    
    print(f"\nNumeric summary successfully saved to: {summary_path}")
    # --- MODIFICATION: Updated final print statement ---
    print(f"Zipf CCDF plots saved to: {ZIPF_PLOTS_DIR}")
except IOError as e:
    print(f"\nError writing summary CSV to {summary_path}: {e}")
except Exception as e:
    print(f"\nAn unexpected error occurred during CSV writing: {e}")


Generating 9 Zipf CCDF plots and numeric summaries...


Summarizing Instances: 100%|██████████| 9/9 [00:01<00:00,  5.65it/s]


Numeric summary successfully saved to: knapsack_multisize_data\numeric_summary.csv
Zipf CCDF plots saved to: knapsack_multisize_data\plots\zipf_ccdf





In [7]:
# (Assuming SIZES, instances, and dist_order are defined)
# dist_order = ["uniform", "normal", "zipf"] # (In case it's not defined)

# Print small sample items for manual inspection
SAMPLE_K = 5
for n in SIZES:
    print("\n" + "="*125)
    print(f"\nSUMMARY for n={n}\n")
    
    # Loop 1: Distribution (e.g., uniform, normal, zipf)
    for dname in dist_order:
        
        # Get the dictionary of capacity ratios for this distribution
        # e.g., {'cap_0.2': {...}, 'cap_0.5': {...}}
        cap_map = instances[n].get(dname)
        
        if not cap_map:
            print(f" Distribution: {dname.capitalize()} (missing)")
            print("-" * 70)
            continue
            
        print(f" Distribution: {dname.capitalize()}")
        
        # Loop 2: Capacity Ratio (e.g., cap_0.2, cap_0.5, cap_0.8)
        # We sort by cap_name to ensure a consistent print order
        for cap_name, inst in sorted(cap_map.items()):
            if inst is None:
                print(f"  {cap_name:7s} | missing")
                continue
            
            weights = [it["weight"] for it in inst["items"]]
            values  = [it["value"]  for it in inst["items"]]
            
            # Safety check for empty lists
            mean_w_str = "N/A"
            mean_v_str = "N/A"
            if len(weights) > 0:
                mean_w = sum(weights) / len(weights)
                mean_v = sum(values) / len(values)
                mean_w_str = f"{mean_w:6.1f}"
                mean_v_str = f"{mean_v:6.1f}"

            # Updated print statement includes cap_name
            print(f"  {cap_name:7s} | seed={inst['meta']['seed']:10d} | capacity={inst['capacity']:5d} | mean_w={mean_w_str} | mean_v={mean_v_str}")
            
            sample = inst["items"][:SAMPLE_K]
            sstr = ", ".join([f"(id={it['id']},w={it['weight']},v={it['value']})" for it in sample])
            print(f"    sample: {sstr}")
        
        print("-" * 70) # Add a separator between distributions

print("\n" + "="*125)



SUMMARY for n=50

 Distribution: Uniform
  cap_0.2 | seed=  20251020 | capacity= 2516 | mean_w= 251.6 | mean_v= 483.2
    sample: (id=0,w=45,v=988), (id=1,w=493,v=11), (id=2,w=441,v=580), (id=3,w=230,v=643), (id=4,w=479,v=620)
  cap_0.5 | seed=  20251023 | capacity= 6008 | mean_w= 240.3 | mean_v= 531.0
    sample: (id=0,w=65,v=675), (id=1,w=482,v=618), (id=2,w=114,v=330), (id=3,w=191,v=525), (id=4,w=91,v=318)
  cap_0.8 | seed=  20251026 | capacity= 9769 | mean_w= 244.2 | mean_v= 455.3
    sample: (id=0,w=81,v=811), (id=1,w=222,v=949), (id=2,w=177,v=524), (id=3,w=125,v=549), (id=4,w=37,v=596)
----------------------------------------------------------------------
 Distribution: Normal
  cap_0.2 | seed=  20251120 | capacity= 2587 | mean_w= 258.7 | mean_v= 493.7
    sample: (id=0,w=217,v=619), (id=1,w=261,v=267), (id=2,w=113,v=170), (id=3,w=185,v=281), (id=4,w=295,v=243)
  cap_0.5 | seed=  20251123 | capacity= 6696 | mean_w= 267.9 | mean_v= 488.0
    sample: (id=0,w=369,v=160), (id=1,w=2