In [1]:
from load_jsons import load_jsons
import pandas as pd
import matplotlib.pyplot as plt
from random import randint
from json import load

In [5]:
def rebuild(data:list[dict]) -> list[dict]:
    out = []
    for datapoint in data:
        solver = datapoint["solver"]
        if solver == "or-tools":
            if "--threads=8" in datapoint["solverOptions"][0]:
                continue
            else:
                solver = "or-tools-1"
        out.append({
            "instance": datapoint["essenceParams"][0],
            "model": datapoint["useExistingModels"][0].split("/")[-1],
            "solver": solver,
            "total_time": datapoint["totalTime"] if datapoint["status"] == "OK" else 36000,
            "status": datapoint["status"],
            "solver_options": datapoint["solverOptions"][0]
        })
    return out

In [42]:
data = load_jsons("../../EssenceCatalog-runs/problems/csplib-prob010-SocialGolfers/", verbose=False)
data = rebuild(data)

In [43]:
instances_data = {}
for datapoint in data:
    inst = datapoint["instance"]
    model = datapoint["model"]
    solver = datapoint["solver"]
    comb = f"{solver}_{model}"
    if not inst in instances_data:
        instances_data[inst] = {}
    instances_data[inst][comb] = datapoint["total_time"]

In [45]:
def load_file(file_name):
    with open(file_name) as f:
        return f.read()

def get_dataset(data):
    dataset = []
    for key in data.keys():
        datapoint = data[key]
        combinations = list(datapoint.keys())
        best = {'combination': combinations[0], 'time': datapoint[combinations[0]]}
        times = []
        for comb in combinations:
            if "bin" in comb:
                continue
            times.append({"time": datapoint[comb], "combination": comb})
            if best['time'] > datapoint[comb]:
                best = {'combination': comb, 'time': datapoint[comb]}

        instance = key.replace("/podmandir/", "") #/EssenceCatalog/problems/csplib-prob010-SocialGolfers/
        dataset.append({
            "combination": best["combination"],
            "time": best["time"],
            "instance_value":load_file(f"../../{instance}"),
            "instance_value_json":load_file(f"./instances/{instance.split('/')[-1].replace('.param','.json')}"),
            "instance_name": instance,
            "all_times": times
        })
    return dataset

dataset = get_dataset(instances_data)

In [2]:
from subprocess import run, PIPE, STDOUT
import concurrent.futures
import os

In [44]:
max_threads = 12
def gen_inst(instance):
    instance = instance.replace("/podmandir/", "")
    out_file = os.path.join("instances",instance.split('/')[-1].replace('.param', '.json'))
    command = ["/home/seppiabrilla/.local/bin/conjure", 
            "translate-parameter", 
            "--eprime=../../EssenceCatalog-runs/problems/csplib-prob010-SocialGolfers/conjure-mode/portfolio4/01_compact.eprime", 
            f"--essence-param=../../{instance}", 
            f"--eprime-param={out_file}", 
            "--output-format=json"]
    run(command)
    
with concurrent.futures.ThreadPoolExecutor(max_threads) as executor:
        # Submit tasks to the thread pool
        futures = {executor.submit(gen_inst, inst): inst for inst in instances_data.keys()}

        # Process results
        for future in concurrent.futures.as_completed(futures):
            text = futures[future]
            try:
                future.result()
            except Exception as e:
                print(f"An error occurred for text '{text}': {e}")

In [46]:
from json import dump
f = open("../data/datasets/dataset_SocialGolfers-2024-05-16.json","w")
dump(dataset, f)
f.close()

In [61]:
text = """#!/bin/bash
# Job name:
#SBATCH --job-name=ap421TrainJob
#
# Partition:
#SBATCH --partition=gpu-serv-02-part
#
# Specify one task:
#SBATCH --ntasks-per-node=1
#
# Number of processors for single task needed for use case (example):
#SBATCH --cpus-per-task=4
#
#SBATCH --mem=16GB
#
#SBATCH --gpus-per-task=1
#
#SBATCH --gpu-bind=single:1
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
## Command(s)
echo "PROBLEM"
start_time=$(date +%s)
dnn-env/bin/python /data/ap421/EFE_Project/network/competitive_network.py /data/ap421/EFE_Project/data/datasets/DATASET 32 1 1e-4 /data/ap421/PROBLEM_fold_0_1.json /data/ap421/PROBLEM_fold_0_competitive 0
end_time=$(date +%s)
elapsed_time=$((end_time - start_time))
echo "Elapsed time: $elapsed_time seconds"
"""

In [3]:
import re
import json
import numpy as np

In [48]:
dataset = os.listdir("../data/datasets")

In [49]:
dataset

['dataset_TailAssignment-2024-05-16.json',
 'dataset_Transshipment-2024-05-16.json',
 'dataset_VesselLoading-2024-05-16.json',
 'dataset_CoveringArray-2024-05-09.json',
 'CarSequencing_problem_specifications.txt',
 'dataset_CarSequencing-2024-03-02.json',
 'dataset_CarSequencing-2024-03-19.json',
 'dataset_SocialGolfers-2024-05-16.json',
 'dataset_CarSequencing-2024-04-16.json',
 'dataset_AbnormalMagicHexagons-2024-05-16.json',
 'dataset_EFPA-2024-05-16.json',
 'dataset_FLECC-2024-05-16.json',
 'dataset.json']

In [62]:
for d in dataset:
    problem = re.findall("dataset_(.*)-2024",d)
    if len(problem) > 0:
        script = text.replace("PROBLEM", problem[0]).replace("DATASET", d)
        f = open(f"../scripts/{problem[0]}_1_epoch.sh","w")
        f.write(script)
        f.close()

In [21]:
files = os.listdir("../slurm")
files

['Transshipment_1_epoch.txt',
 'TailAssignment_1_epoch.txt',
 'CoveringArray_1_epoch.txt',
 'CarSequencing_1_epoch.txt',
 'FLECC_1_epoch.txt',
 'AbnormalMagicHexagons_1_epoch.txt',
 'EFPA_1_epoch.txt',
 'VesselLoading_1_epoch.txt',
 'SocialGolfers_1_epoch.txt']

In [26]:
training_times = pd.read_csv("../training_time.csv")
training_times

Unnamed: 0,model,time
0,Transshipment,4075
1,SocialGolfers,113
2,AbnormalMagicHexagons,115
3,CarSequencing,7003
4,VesselLoading,1983
5,CoveringArray,205
6,FLECC,4996
7,EFPA,328
8,TailAssignment,2218


In [34]:
datasets = {}
for file in os.listdir("../data/datasets"):
    if ".json" in file and "2024" in file:
        with open(os.path.join("../data/datasets", file)) as f:
            datasets[file] = json.load(f)

In [31]:
def is_competitive(vb, option):
    return option < 10 or vb * 2 > option

In [37]:
stats = []
for d in datasets.keys():
    dataset = datasets[d]
    competitives = []
    combs = {t["combination"]: 0 for t in dataset[0]["all_times"]}
    vb_time = 0
    for instance in dataset:
        vb = instance["time"]
        comp = len([t for t in instance["all_times"] if is_competitive(vb, t["time"])])
        for t in instance["all_times"]:
            combs[t["combination"]] += t["time"]
        vb_time += vb 
        competitives.append(comp)
    sb = min(combs.values())
    stats.append({
        "model":d, 
        "mean_competitive":np.mean(competitives), 
        "median_competitive": np.median(competitives), 
        "number_options": len(dataset[0]["all_times"]),
        "vb": vb_time,
        "sb": sb,
        "perc": round(vb_time/sb, 2)
        })
stats = pd.DataFrame(stats)
stats

Unnamed: 0,model,mean_competitive,median_competitive,number_options,vb,sb,perc
0,dataset_TailAssignment-2024-05-16.json,6.025245,6.0,16,128724.93,129739.9,0.99
1,dataset_Transshipment-2024-05-16.json,7.538606,8.0,16,44251.41,49353.59,0.9
2,dataset_VesselLoading-2024-05-16.json,3.514028,3.0,16,6475.01,6676.94,0.97
3,dataset_CoveringArray-2024-05-09.json,3.000894,3.0,4,31799.44,43845.74,0.73
4,dataset_CarSequencing-2024-03-02.json,10.159556,10.0,16,2828.368,3133.884,0.9
5,dataset_CarSequencing-2024-03-19.json,6.132269,6.0,16,108953.03,7404031.23,0.01
6,dataset_SocialGolfers-2024-05-16.json,3.428296,3.0,16,182062.1,278999.29,0.65
7,dataset_CarSequencing-2024-04-16.json,6.132269,6.0,16,108953.03,7404031.23,0.01
8,dataset_AbnormalMagicHexagons-2024-05-16.json,3.89801,4.0,4,595.75,871.56,0.68
9,dataset_EFPA-2024-05-16.json,1.92194,2.0,16,205406.55,308588.38,0.67


In [40]:
training_times = training_times.to_dict()

In [54]:
final_df = []
for i in range(9):
    model = training_times["model"][i]
    time = training_times["time"][i]
    for j in range(10):
        stat = stats.iloc[j]
        stat_model = re.findall("dataset_(.*)-2024", stat["model"])[0]
        if stat_model == model:
            final_df.append({
                "model": model, 
                "epoch_time":round((time * 10 * 10) / (60 * 60 * 24), 2), 
                "mean_competitive": round(stat["mean_competitive"],2), 
                "median_competitive": int(stat["mean_competitive"]),
                "all_options": stat["number_options"],
                "sb/vb":stat["perc"]})
print(sum([d["epoch_time"] for d in final_df]) - 16.22)
pd.DataFrame(final_df)

18.58


Unnamed: 0,model,epoch_time,mean_competitive,median_competitive,all_options,sb/vb
0,Transshipment,4.72,7.54,7,16,0.9
1,SocialGolfers,0.13,3.43,3,16,0.65
2,AbnormalMagicHexagons,0.13,3.9,3,4,0.68
3,CarSequencing,8.11,10.16,10,16,0.9
4,CarSequencing,8.11,6.13,6,16,0.01
5,CarSequencing,8.11,6.13,6,16,0.01
6,VesselLoading,2.3,3.51,3,16,0.97
7,CoveringArray,0.24,3.0,3,4,0.73
8,EFPA,0.38,1.92,1,16,0.67
9,TailAssignment,2.57,6.03,6,16,0.99


In [5]:
for file in os.listdir("../data/datasets"):
    if ".json" in file and "2024" in file:
        f = open(os.path.join("../data/datasets/", file))
        dataset = json.load(f)
        model = re.findall("dataset_(.*)-2024", file)[0]
        if not f"{model}_times.csv" in os.listdir("../times"):
            combinations = sorted([t["combination"] for t in dataset[0]["all_times"]])
            csv_times = f"inst,{','.join(combinations)}"
            for instance in dataset:
                instance_times = {t["combination"]:t["time"] for t in instance["all_times"]}
                csv_times += f"\n{instance['instance_name']},{','.join([str(instance_times[c]) for c in combinations])}"
            f = open(f"../times/{model}_times.csv","w")
            f.write(csv_times)