In [10]:
import pandas as pd
import numpy as np
import json


df = pd.read_csv('input.csv')

metrics = ['solution_time', 'peak_memory', 'optimality_gap']

bin_edges = {}

for m in metrics:
    min_val = df[m].min()
    max_val = df[m].max()
    
    if min_val == max_val:
        edges = [min_val, max_val]
        df[f'{m}_bin'] = 0
    else:
        # 6 edge‐points for 5 equal‐width bins
        edges = np.linspace(min_val, max_val, num=6)
        df[f'{m}_bin'] = pd.cut(
            df[m],
            bins=edges,
            labels=False,
            include_lowest=True,
            duplicates='drop'
        )
   
    bin_edges[m] = [float(e) for e in edges]


for m, edges in bin_edges.items():
    print(f"  {m}: {edges}")



with open('bin_edges.json', 'w') as f:
    json.dump(bin_edges, f, indent=2)

df.to_csv('bins.csv', index=False)

Bin edges for each metric:
  solution_time: [0.000563622, 60.003916697600005, 120.00726977320001, 180.0106228488, 240.0139759244, 300.017329]
  peak_memory: [157696.0, 225688.8, 293681.6, 361674.4, 429667.2, 497660.0]
  optimality_gap: [0.0, 0.0017469232000000001, 0.0034938464000000002, 0.0052407696, 0.0069876928000000005, 0.008734616]

Sample of the DataFrame with new *_bin columns:
   number_of_elements    capacity  max_weight  min_weight   mean_weight  \
0                1363  40012301.0  99898279.5      7956.3  4.986272e+07   
1                  68     88735.6      9725.1        32.1  4.873713e+03   
2                1248     69910.6      9997.8         4.1  4.950323e+03   
3                 933  72565856.2  99698044.3     17147.7  4.939222e+07   
4                  85   5045562.0  99068429.4    343382.0  4.379182e+07   

   median_weight    std_weight  weight_range  max_profit  min_profit  ...  \
0    49347188.00  2.900847e+07    99890323.2  99919844.5    353966.2  ...   
1       