In [5]:
import pandas as pd
from ydata_profiling import ProfileReport

from gnn_scheduler import get_project_path
from gnn_scheduler.jssp import load_pickle_instances

In [6]:
# Load instances from "diff_prediction_instances folder"
instances = load_pickle_instances(folder_name="diff_prediction_instances")

Loading instances:   0%|          | 0/49519 [00:00<?, ?it/s]

Loading instances: 100%|██████████| 49519/49519 [00:07<00:00, 6207.39it/s] 


49519

In [9]:
names = []
lower_bounds = []
upper_bounds = []
is_optimal = []
n_jobs = []
no_solutions = []  # True if upper_bound is lower_bound * 2
# In this case lower_bound is max_machine_load
max_machine_load_div_upper_bound = []  # lower_bound / upper_bound
upper_bound_div_max_machine_load_minus_1 = []  # upper_bound / lower_bound - 1
upper_bound_div_max_job_duration_minus_1 = []  # upper_bound / max_job_duration - 1
for instance in instances:
    names.append(instance.name)
    lower_bounds.append(instance.lower_bound)
    upper_bounds.append(instance.upper_bound)
    n_jobs.append(instance.n_jobs)
    is_optimal.append(instance.optimum is not None)

    no_solution = instance.upper_bound == instance.lower_bound * 2
    no_solutions.append(no_solution)

    difficulty_score_1 = instance.lower_bound / instance.upper_bound
    max_machine_load_div_upper_bound.append(difficulty_score_1)

    difficulty_score_2 = instance.upper_bound / instance.lower_bound - 1
    upper_bound_div_max_machine_load_minus_1.append(difficulty_score_2)

    difficulty_score_3 = instance.upper_bound / instance.max_job_duration - 1
    upper_bound_div_max_job_duration_minus_1.append(difficulty_score_3)


# Create dataframe
df = pd.DataFrame(
    {
        "name": names,
        "n_jobs": n_jobs,
        "lower_bound": lower_bounds,
        "upper_bound": upper_bounds,
        "is_optimal": is_optimal,
        "no_solution": no_solutions,
        "max_machine_load_div_upper_bound": max_machine_load_div_upper_bound,
        "upper_bound_div_max_machine_load_minus_1": upper_bound_div_max_machine_load_minus_1,
        "upper_bound_div_max_job_duration_minus_1": upper_bound_div_max_job_duration_minus_1,
    }
)
df.head()

Unnamed: 0,name,n_jobs,lower_bound,upper_bound,is_optimal,no_solution,max_machine_load_div_upper_bound,upper_bound_div_max_machine_load_minus_1,upper_bound_div_max_job_duration_minus_1
0,naive_generated_instance_35302,20,1092,1388,False,False,0.786744,0.271062,1.161994
1,naive_generated_instance_49119,10,668,791,False,False,0.844501,0.184132,0.305281
2,naive_generated_instance_39795,16,1005,1232,False,False,0.815747,0.225871,0.910078
3,naive_generated_instance_26963,13,825,965,False,False,0.854922,0.169697,0.291834
4,naive_generated_instance_33786,19,1057,1296,False,False,0.815586,0.226112,0.954751


In [10]:
# Create report
profile = ProfileReport(df, title="JSSP Difficulty Prediction Instances")
profile.to_file(
    get_project_path()
    / "reports"
    / "jssp_difficulty_prediction_instances.html"
)

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

  is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(
  not pdt.is_categorical_dtype(series)
  is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(
  if pdt.is_categorical_dtype(series):
  is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(
  is_valid_dtype = pdt.is_categorical_dtype(series) and not pdt.is_bool_dtype(
  not pdt.is_categorical_dtype(series)
  if pdt.is_categorical_dtype(series):
Summarize dataset: 100%|██████████| 54/54 [00:04<00:00, 13.16it/s, Completed]                                                                                 
Generate report structure: 100%|██████████| 1/1 [00:01<00:00,  1.81s/it]
Render HTML: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s]
Export report to file: 100%|██████████| 1/1 [00:00<00:00, 343.26it/s]
