In [6]:
from envs.minimal_jsp_env.util.jsp_conversion.readers import JSPReaderJSON
from envs.minimal_jsp_env.entities import JSPInstance, Operation
from envs.minimal_jsp_env.util.jsp_generation.jsp_generator import JSPGenerator
import os
import pandas as pd
from typing import List
import random

class DatasetAnalyzer():
    def __init__(self, dataset_path: str, instance_reader) -> None:
        self.dataset_path = dataset_path
        self.instance_reader = instance_reader

    def analyze_dataset(self):

        entropies = {}
        results_df = pd.DataFrame(columns=['entropy', 'file_name', 'relative_entropy', 'num_jobs', 'num_ops_per_job', 'max_op_time', 'opt_time'])

        for entropy in os.listdir(self.dataset_path):
            entropy_data_path = f"{self.dataset_path}/{entropy}"

            for file_name in os.listdir(entropy_data_path):
                instance = self.instance_reader.read_instance(f"{entropy_data_path}/{file_name}")
                
                file_data = pd.DataFrame({
                    'entropy': entropy, 
                    'file_name': file_name,
                    'relative_entropy': instance.relative_entropy, 
                    'num_jobs': instance.num_jobs, 
                    'num_ops_per_job': instance.num_ops_per_job, 
                    'max_op_time': instance.max_op_time, 
                    'opt_time': instance.opt_time
                    }, index=[0])
                
                results_df = pd.concat([results_df, file_data], ignore_index=True)
                
        self.mean_num_ops_per_job = results_df['num_ops_per_job'].values.mean()
        self.mean_num_jobs = results_df['num_jobs'].values.mean()
        self.mean_max_op_time = results_df['max_op_time'].values.mean()

        self.mean_relative_entropies = results_df.groupby('entropy').mean(numeric_only=True)['relative_entropy'].to_dict()

In [62]:
reader = JSPReaderJSON()

analyzer = DatasetAnalyzer(dataset_path="D:/IMA/datasets/pool_dataset_generation/", instance_reader=reader)

In [63]:
analyzer.analyze_dataset()

In [64]:
print(f"mean_num_ops_per_job = {analyzer.mean_num_ops_per_job}")
print(f"mean_num_jobs = {analyzer.mean_num_jobs}")
print(f"mean_max_op_time = {analyzer.mean_max_op_time}")

print(f"mean_relative_entropies = {analyzer.mean_relative_entropies}")


mean_num_ops_per_job = 6.0
mean_num_jobs = 6.0
mean_max_op_time = 9.0
mean_relative_entropies = {'entropy0_2': 0.19342640361727076, 'entropy0_3': 0.30815474690146727, 'entropy0_4': 0.3947770630671603, 'entropy0_5': 0.5033317060157296, 'entropy0_6': 0.594412404004376, 'entropy0_7': 0.702960377677298, 'entropy0_8': 0.7869543323305185}


In [11]:
from envs.minimal_jsp_env.util.jsp_generation.entropy_functions import EntropyOptimizer, calculate_entropy_from_operations_list

optimizer = EntropyOptimizer(
    output_size=36, 
    hidden_size=20, 
    learning_rate=0.003, 
    num_epochs=200, 
    max_episodes=200, 
    precision=0.01,
    )
entropies = optimizer.find_entropies()

found entropy = 0.9182880930023084 vs target=0.9210340371976184 at epoch=7. found/max = 0.1994037257941813. Output=[0.0561, 0.3008, 0.6165, 0.0265, 0.0001]
found entropy = 1.3886880151346708 vs target=1.3815510557964275 at epoch=181. found/max = 0.3015497710290835. Output=[0.0115, 0.0821, 0.2857, 0.4895, 0.0197, 0.0249, 0.0173, 0.0693]
found entropy = 1.83996014355174 vs target=1.842068074395237 at epoch=78. found/max = 0.3995422686332179. Output=[0.2261, 0.0687, 0.1972, 0.0852, 0.1434, 0.2326, 0.0325, 0.0143]
found entropy = 2.300630963322398 vs target=2.302585092994046 at epoch=2. found/max = 0.4995756661333399. Output=[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.57]
found entropy = 2.769230773685247 vs target=2.763102111592855 at epoch=34. found/max = 0.6013308220640877.

In [42]:
from collections import Counter

class RandomJSPGeneratorOperationDistirbution(JSPGenerator):
    def __init__(self, num_jobs: int, num_operations: int, max_op_duration: int = 9):
        self.num_jobs = num_jobs
        self.num_operations = num_operations
        self.max_op_duration = max_op_duration
        self.pool_size = self.num_jobs*self.num_operations
    
    def generate(self, operation_distribution: List):
        assert len(operation_distribution) <= self.pool_size, "The size of the operation_distribution list does not match the pool_size."
        assert self.num_operations*self.max_op_duration >= len(operation_distribution), "Not possible to generate unique operations list with given num_operations and max_op_duration"

        # making sure that the random operations are unique
        random_operations = set()
        while len(random_operations) < len(operation_distribution):
            random_operations.add((random.randint(0, self.num_operations - 1), random.randint(1, self.max_op_duration)))
        random_operations = list(random_operations)


        operations_pool = []
        for distr, operation in zip(operation_distribution, random_operations):
            operations_pool += int(self.pool_size*distr)*[operation]
        

        # Following part is to fix the rounding issue of the multiplication distrubution*pool_size
        if len(operations_pool) != self.pool_size:
            size_difference = self.pool_size - len(operations_pool)

            freq_counts = Counter(operations_pool)
            freq_dict = {k: v for k, v in freq_counts.items()}
            operations_pool.sort(key=lambda x: freq_dict[x])

            operations_pool += operations_pool[:size_difference]
        
        random.shuffle(operations_pool)

        jobs = []
        for job_id in range(0, self.num_jobs):
            job_operations = operations_pool[self.num_operations*job_id:self.num_operations*(job_id+1)]
            operations = [Operation(job_id, op_id, type, duration) for op_id, (type, duration) in enumerate(job_operations)]

            jobs.append(operations)

        return JSPInstance(jobs, num_ops_per_job=self.num_operations, max_op_time=self.max_op_duration)

In [55]:
generator = RandomJSPGeneratorOperationDistirbution(num_jobs=10, num_operations=10, max_op_duration=9)
for key in entropies.keys():
    instance = generator.generate(entropies[key])
    print(f"target={key}, instance={instance.relative_entropy}")

[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (3, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (5, 3), (5, 3)]
target=0.2, instance=0.20439076217880695
[(2, 1), (9, 9), (9, 9), (9, 9), (9, 9), (9, 9), (9, 9), (9, 9), (9, 9), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8), (6, 8

In [32]:
pool = [(4, 4), (6, 2), (6, 2), (3, 4), (6, 2), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (6, 2), (3, 4), (3, 4), (6, 2), (6, 2), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (6, 2), (3, 4), (3, 4), (4, 4), (6, 2), (3, 4), (3, 4), (6, 2), (6, 2), (6, 2), (3, 4), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (4, 4), (3, 4), (3, 4), (3, 4), (5, 4), (3, 4), (3, 4), (3, 4), (6, 2), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (3, 4), (3, 4), (6, 2), (3, 4), (3, 4), (6, 2), (6, 2), (3, 4), (4, 4), (6, 2), (3, 4), (3, 4), (4, 4), (5, 4), (3, 4), (3, 4), (3, 4), (6, 2), (3, 4), (3, 4), (3, 4), (3, 4), (3, 4), (6, 2), (6, 2), (3, 4), (6, 2), (3, 4), (6, 2)]

In [33]:
len(pool)

98

In [40]:
from collections import Counter
freq_counts = Counter(pool)
freq_dict = {k: v for k, v in freq_counts.items()}
print(freq_dict)
pool.sort(key=lambda x: freq_dict[x])
pool


{(5, 4): 2, (4, 4): 5, (6, 2): 30, (3, 4): 61}


[(5, 4),
 (5, 4),
 (4, 4),
 (4, 4),
 (4, 4),
 (4, 4),
 (4, 4),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (6, 2),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4),
 (3, 4)]