In [30]:
from envs.minimal_jsp_env.util.jsp_conversion.readers import JSPReaderJSON
from envs.minimal_jsp_env.entities import JSPInstance
import os
import pandas as pd

class DatasetAnalyzer():
    def __init__(self, dataset_path: str, instance_reader) -> None:
        self.dataset_path = dataset_path
        self.instance_reader = instance_reader

    def analyze_dataset(self):

        entropies = {}
        results_df = pd.DataFrame(columns=['entropy', 'file_name', 'relative_entropy', 'num_jobs', 'num_ops_per_job', 'max_op_time', 'opt_time'])

        for entropy in os.listdir(self.dataset_path):
            entropy_data_path = f"{self.dataset_path}/{entropy}"

            for file_name in os.listdir(entropy_data_path):
                instance = self.instance_reader.read_instance(f"{entropy_data_path}/{file_name}")
                
                file_data = pd.DataFrame({
                    'entropy': entropy, 
                    'file_name': file_name,
                    'relative_entropy': instance.relative_entropy, 
                    'num_jobs': instance.num_jobs, 
                    'num_ops_per_job': instance.num_ops_per_job, 
                    'max_op_time': instance.max_op_time, 
                    'opt_time': instance.opt_time
                    }, index=[0])
                
                results_df = pd.concat([results_df, file_data], ignore_index=True)
                
        self.mean_num_ops_per_job = results_df['num_ops_per_job'].values.mean()
        self.mean_num_jobs = results_df['num_jobs'].values.mean()
        self.mean_max_op_time = results_df['max_op_time'].values.mean()

        self.mean_relative_entropies = results_df.groupby('entropy').mean(numeric_only=True)['relative_entropy'].to_dict()

In [34]:
reader = JSPReaderJSON()

analyzer = DatasetAnalyzer(dataset_path="C:/Users/Comberek/Desktop/pool_dataset_generation/", instance_reader=reader)

In [35]:
analyzer.analyze_dataset()

In [36]:
print(f"mean_num_ops_per_job = {analyzer.mean_num_ops_per_job}")
print(f"mean_num_jobs = {analyzer.mean_num_jobs}")
print(f"mean_max_op_time = {analyzer.mean_max_op_time}")

print(f"mean_relative_entropies = {analyzer.mean_relative_entropies}")


mean_num_ops_per_job = 6.0
mean_num_jobs = 6.0
mean_max_op_time = 9.0
mean_relative_entropies = {'entropy0_2': 0.19225076471475588, 'entropy0_3': 0.28777024632164655, 'entropy0_4': 0.365096660936818, 'entropy0_5': 0.4403616587648972, 'entropy0_6': 0.5428296471638181, 'entropy0_7': 0.685935809697416, 'entropy0_8': 0.7702912881987423}
