In [89]:
import os
import re
import pandas as pd

# -------------------- PARSE BLOCK FILE --------------------
def parse_results(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    blocks = content.strip().split('--------------------------------------------------------------------------------')
    results = []

    for block in blocks:
        if not block.strip():
            continue

        data = {}
        file_match = re.search(r'Plik:\s*(\S+)', block)
        if file_match:
            filename = file_match.group(1)
            data['file'] = filename
            data['id'] = int(re.search(r'\d+', filename).group())  # extract number

        makespan_match = re.search(r'Makespan:\s*(\d+)', block)
        if makespan_match:
            data['makespan'] = int(makespan_match.group(1))

#         integral_match = re.search(r'Całkowitoliczbowe przypisania:\s*(\d+)\s*\(([\d.]+)%\)', block)
#         if integral_match:
#             data['integral_percentage'] = float(integral_match.group(2))

        integral_match = re.search(r'Całkowitoliczbowe przypisania:\s*(\d+)\s*\(([\d.]+)%\)', block)
        if integral_match:
            data['integral_assignments'] = int(integral_match.group(1))
            data['integral_percentage'] = float(integral_match.group(2))


        tasks_match = re.search(r'Liczba zadań:\s*(\d+)', block)
        if tasks_match:
            data['num_tasks'] = int(tasks_match.group(1))

        machines_match = re.search(r'Liczba maszyn:\s*(\d+)', block)
        if machines_match:
            data['num_machines'] = int(machines_match.group(1))

        time_match = re.search(r'Czas działania \(s\):\s*([\d.]+)', block)
        if time_match:
            data['time_seconds'] = float(time_match.group(1))

        results.append(data)

    return pd.DataFrame(results)
# -------------------- PARSE CMAX FOLDER --------------------
def extract_cmax_values(folder_path):
    cmax_pattern = re.compile(r"Cmax\s+([\d.]+)")
    file_id_pattern = re.compile(r'(\d+)\D')  # Extract numeric part before non-digit (e.g., "1045CPLEX")

    cmax_records = []

    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            match_id = file_id_pattern.match(filename)
            if not match_id:
                continue
            file_id = int(match_id.group(1))

            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                content = f.read()
                match_cmax = cmax_pattern.search(content)
                if match_cmax:
                    cmax_value = float(match_cmax.group(1))
                    cmax_records.append({'id': file_id, 'cmax': cmax_value})

    return pd.DataFrame(cmax_records)


# -------------------- MAIN SCRIPT --------------------
# if __name__ == "__main__":
    
#     # MaqCorre
#     wyniki_file = "wyniki_MaqCorre.txt"
#     cmax_folder = "RCmax/TXT Cplex 2 horas log Maq Corre/"
    
#     # JobsCorre
#     wyniki_file = "wyniki_JobsCorre.txt"
#     cmax_folder = "RCmax/TXT Cplex 2 horas log Jobs Corre/"
     
#     # 1a100
#     wyniki_file = "wyniki_1a100.txt"
#     cmax_folder = "RCmax/TXT Cplex 2 horas log U(1,100)/txt Cplex 2 horas log/"
       
#     # 100a120
#     wyniki_file = "wyniki_100a120.txt"
#     cmax_folder = "RCmax/TXT Cplex 2 horas log U(100,120)/"
    
#     # 100a200
#     wyniki_file = "wyniki_100a200.txt
#     cmax_folder = "RCmax/TXT Cplex 2 horas log U(100,200)/"
    
#     # de10a100
#     wyniki_file = "wyniki_de10a100.txt"
#     cmax_folder = "RCmax/TXT CPLEX 2 horas log U(10,100)/TXT CPLEX 2 horas de 10 a 100 log/"
    
#     # Instanciasde1000a1100
#     wyniki_file = "wyniki_Instanciasde1000a1100.txt"
#     cmax_folder = "RCmax/TXT Cplex 2 horas log U(1000,1100)/TXT Cplex 2 horas U(1000,1100)/"
    
    
    
#     # Load parsed data
#     df_results = parse_results(wyniki_file)
#     df_cmax = extract_cmax_values(cmax_folder)

    
#     # Merge on 'id'
#     df_merged = pd.merge(df_results, df_cmax, on="id", how="left")

#     # Show results
#     print(df_merged[['file', 'makespan', 'integral_percentage', 'cmax']])
    
    
    
if __name__ == "__main__":
    datasets = {
    "MaqCorre": ("wyniki_MaqCorre.txt", "RCmax/TXT Cplex 2 horas log Maq Corre/"),
    "JobsCorre": ("wyniki_JobsCorre.txt", "RCmax/TXT Cplex 2 horas log Jobs Corre/"),
    "1a100": ("wyniki_1a100.txt", "RCmax/TXT Cplex 2 horas log U(1,100)/txt Cplex 2 horas log/"),
    "100a120": ("wyniki_100a120.txt", "RCmax/TXT Cplex 2 horas log U(100,120)/"),
    "100a200": ("wyniki_100a200.txt", "RCmax/TXT Cplex 2 horas log U(100,200)/"),
    "de10a100": ("wyniki_de10a100.txt", "RCmax/TXT CPLEX 2 horas log U(10,100)/TXT CPLEX 2 horas de 10 a 100 log/"),
    "1000a1100": ("wyniki_Instanciasde1000a1100.txt", "RCmax/TXT Cplex 2 horas log U(1000,1100)/TXT Cplex 2 horas U(1000,1100)/")
    }

    df_dict = {}

    for name, (wyniki_file, cmax_folder) in datasets.items():
        print(f"Processing: {name}")
        df_results = parse_results(wyniki_file)
        df_cmax = extract_cmax_values(cmax_folder)
        df_merged = pd.merge(df_results, df_cmax, on="id", how="left")
        df_merged['dataset'] = name
        df_dict[name] = df_merged

    # Example: access MaqCorre dataframe
    print(df_dict["MaqCorre"].head())

    # Optional: combine all into one DataFrame
    df_all = pd.concat(df_dict.values(), ignore_index=True)
    print(df_all[['dataset', 'file', 'makespan', 'cmax', 'integral_percentage']].head())



Processing: MaqCorre
Processing: JobsCorre
Processing: 1a100
Processing: 100a120
Processing: 100a200
Processing: de10a100
Processing: 1000a1100
       file    id  makespan  integral_assignments  integral_percentage  \
0  1011.txt  1011      2581                   990                 99.0   
1  1012.txt  1012      3578                   990                 99.0   
2  1013.txt  1013      2090                   991                 99.1   
3  1014.txt  1014      4386                   990                 99.0   
4  1015.txt  1015      2730                   990                 99.0   

   num_tasks  num_machines  time_seconds    cmax   dataset  
0       1000            10        3.8792  2546.0  MaqCorre  
1       1000            10        3.5508  3537.0  MaqCorre  
2       1000            10        3.0524  2040.0  MaqCorre  
3       1000            10        2.9084  4313.0  MaqCorre  
4       1000            10        3.0528  2649.0  MaqCorre  
    dataset      file  makespan    cmax  inte

In [106]:
# Initialize result tables
avg_rows = []
min_rows = []
max_rows = []
approx_rows = []
for name, df in df_dict.items():
    df = df.copy()
    df['makespan_over_cmax'] = df['makespan'] / df['cmax']

    avg_rows.append({
        'Dataset': name,
        'Makespan / Cmax': df['makespan_over_cmax'].mean(),
        'Time (s)': df['time_seconds'].mean(),
        'Integral %': df['integral_percentage'].mean()
    })

    min_rows.append({
        'Dataset': name,
        'Makespan / Cmax': df['makespan_over_cmax'].min(),
        'Time (s)': df['time_seconds'].min(),
        'Integral %': df['integral_percentage'].min()
    })

    max_rows.append({
        'Dataset': name,
        'Makespan / Cmax': df['makespan_over_cmax'].max(),
        'Time (s)': df['time_seconds'].max(),
        'Integral %': df['integral_percentage'].max()
    })
    
    approx_rows.append({
        'Dataset': name,
        'min approx.': df['makespan_over_cmax'].min(),
        'avg approx.': df['makespan_over_cmax'].mean(),
        'max approx.': df['makespan_over_cmax'].max()
    })

# Convert to DataFrames
avg_df = pd.DataFrame(avg_rows).set_index('Dataset')
min_df = pd.DataFrame(min_rows).set_index('Dataset')
max_df = pd.DataFrame(max_rows).set_index('Dataset')
approx_df = pd.DataFrame(approx_rows).set_index('Dataset')

# Format and export LaTeX
def format_latex_table(df, caption):
    return df.round(3).to_latex(
        caption=caption,
        label=f"tab:{caption.lower().replace(' ', '_')}",
        position='htbp',
        column_format='lccc',
        escape=False
    )

print("Average Table:\n")
print(format_latex_table(avg_df, "Average metrics across datasets"))

print("\nMinimum Table:\n")
print(format_latex_table(min_df, "Minimum metrics across datasets"))

print("\nMaximum Table:\n")
print(format_latex_table(max_df, "Maximum metrics across datasets"))

print("\nApproximation Table:\n")
print(format_latex_table(approx_df, "Approximation ratios across datasets"))

Average Table:

\begin{table}[htbp]
\caption{Average metrics across datasets}
\label{tab:average_metrics_across_datasets}
\begin{tabular}{lccc}
\toprule
 & Makespan / Cmax & Time (s) & Integral % \\
Dataset &  &  &  \\
\midrule
MaqCorre & 1.236000 & 4.092000 & 89.688000 \\
JobsCorre & 1.181000 & 4.413000 & 88.593000 \\
1a100 & 1.353000 & 1.126000 & 92.211000 \\
100a120 & 1.085000 & 5.089000 & 88.622000 \\
100a200 & 1.088000 & 5.469000 & 87.898000 \\
de10a100 & 1.146000 & 2.630000 & 88.509000 \\
1000a1100 & 1.084000 & 12.059000 & 87.790000 \\
\bottomrule
\end{tabular}
\end{table}


Minimum Table:

\begin{table}[htbp]
\caption{Minimum metrics across datasets}
\label{tab:minimum_metrics_across_datasets}
\begin{tabular}{lccc}
\toprule
 & Makespan / Cmax & Time (s) & Integral % \\
Dataset &  &  &  \\
\midrule
MaqCorre & 1.006000 & 0.128000 & 69.000000 \\
JobsCorre & 1.009000 & 0.126000 & 60.000000 \\
1a100 & 1.015000 & 0.063000 & 78.000000 \\
100a120 & 1.002000 & 0.146000 & 59.000000 \\
100

In [107]:
# df_cmax

In [92]:
# df_merged

In [93]:
for name, df in df_dict.items():
    print(f"\n=== Dataset: {name} ===")

    # Rows where makespan < cmax
    lower_than_cmax = df[df["makespan"] < df["cmax"]]
    if not lower_than_cmax.empty:
        print("  MAKESPAN < CMAX:")
        print(lower_than_cmax[["id", "file", "makespan", "cmax"]])
    else:
        print("  No instances where MAKESPAN < CMAX.")

    # Rows where makespan > 2 * cmax
    greater_than_2cmax = df[df["makespan"] > 2 * df["cmax"]]
    if not greater_than_2cmax.empty:
        print("  MAKESPAN > 2 * CMAX:")
        print(greater_than_2cmax[["id", "file", "makespan", "cmax"]])
    else:
        print("  No instances where MAKESPAN > 2 * CMAX.")



=== Dataset: MaqCorre ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: JobsCorre ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: 1a100 ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: 100a120 ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: 100a200 ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: de10a100 ===
  No instances where MAKESPAN < CMAX.
  No instances where MAKESPAN > 2 * CMAX.

=== Dataset: 1000a1100 ===
  MAKESPAN < CMAX:
      id     file  makespan    cmax
124  235  235.txt      7037  7039.0
128  239  239.txt      7034  7038.0
  No instances where MAKESPAN > 2 * CMAX.


In [94]:
# # Rows where makespan < cmax
# lower_than_cmax = df_merged[df_merged["makespan"] < df_merged["cmax"]]
# print("=== MAKESPAN < CMAX ===")
# print(lower_than_cmax[["id", "file", "makespan", "cmax"]])

# # Rows where makespan > 2 * cmax
# greater_than_2cmax = df_merged[df_merged["makespan"] > 2 * df_merged["cmax"]]
# print("\n=== MAKESPAN > 2 * CMAX ===")
# print(greater_than_2cmax[["id", "file", "makespan", "cmax"]])


In [100]:
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Prepare instance names
# df_merged['instance'] = df_merged['file'].str.replace('.txt', '', regex=False)

# # Sort by id (optional, helps consistent plotting)
# df_merged = df_merged.sort_values('id')

# # Plot 1: Makespan / Cmax
# plt.figure(figsize=(14, 5))
# sns.scatterplot(x='instance', y=df_merged['makespan'] / df_merged['cmax'], data=df_merged, marker='o')
# plt.axhline(2.0, color='red', linestyle='--', label='2.0 threshold')
# plt.xticks(rotation=90)
# plt.ylabel('Makespan / Cmax')
# plt.title('Relative Makespan per Instance')
# plt.legend()
# plt.tight_layout()
# plt.show()

In [101]:
# # Plot 2: Time in seconds
# plt.figure(figsize=(14, 5))
# sns.barplot(x='instance', y='time_seconds', data=df_merged)
# plt.xticks(rotation=90)
# plt.ylabel('Time (s)')
# plt.title('Execution Time per Instance')
# plt.tight_layout()
# plt.show()


In [102]:

# # Plot 3: Integral assignment percentage
# plt.figure(figsize=(14, 5))
# sns.barplot(x='instance', y='integral_percentage', data=df_merged)
# plt.xticks(rotation=90)
# plt.ylabel('Integral Assignment (%)')
# plt.title('Integral Assignment Percentage per Instance')
# plt.tight_layout()
# plt.show()

In [109]:
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import seaborn as sns

for name, df in df_dict.items():
    print(f"Plotting for: {name}")
    df = df.copy()
    df['instance'] = df['file'].str.replace('.txt', '', regex=False)
    df = df.sort_values('id')

    pdf_filename = f"plots_{name}.pdf"
    with PdfPages(pdf_filename) as pdf:

        # Plot 1: Makespan / Cmax
        plt.figure(figsize=(14, 5))
        sns.scatterplot(x='instance', y=df['makespan'] / df['cmax'], data=df, marker='o')
        plt.axhline(2.0, color='red', linestyle='--', label='2.0 threshold')
        xticks = df['instance'].tolist()
        step = max(1, len(xticks) // 20)  # Show at most ~20 labels
        plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)        
        plt.ylabel('Makespan / Cmax')
        plt.title(f'Relative Makespan per Instance: {name}')
        plt.legend()
        plt.tight_layout()
        pdf.savefig()
        plt.close()

        # Plot 2: Time in seconds
        plt.figure(figsize=(14, 5))
        sns.barplot(x='instance', y='time_seconds', data=df)
        xticks = df['instance'].tolist()
        step = max(1, len(xticks) // 20)  # Show at most ~20 labels
        plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
        plt.ylabel('Time (s)')
        plt.title(f'Execution Time per Instance: {name}')
        plt.tight_layout()
        pdf.savefig()
        plt.close()

        # Plot 3: Integral assignment percentage
        plt.figure(figsize=(14, 5))
        sns.barplot(x='instance', y='integral_percentage', data=df)
        xticks = df['instance'].tolist()
        step = max(1, len(xticks) // 20)  # Show at most ~20 labels
        plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
        plt.ylabel('Integral Assignment (%)')
        plt.title(f'Integral Assignment Percentage per Instance: {name}')
        plt.tight_layout()
        pdf.savefig()
        plt.close()
        
        # Plot 4: Number of integral assignments (scatter)
        plt.figure(figsize=(14, 5))
        sns.scatterplot(x='instance', y='integral_assignments', data=df, marker='o', s=60)
        xticks = df['instance'].tolist()
        step = max(1, len(xticks) // 20)  # Show at most ~20 labels
        plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
        plt.ylabel('Number of Integral Assignments')
        plt.title(f'Integral Assignments per Instance: {name}')
        plt.tight_layout()
        pdf.savefig()
        plt.close()

    print(f"Saved to {pdf_filename}")


Plotting for: MaqCorre
Saved to plots_MaqCorre.pdf
Plotting for: JobsCorre
Saved to plots_JobsCorre.pdf
Plotting for: 1a100
Saved to plots_1a100.pdf
Plotting for: 100a120
Saved to plots_100a120.pdf
Plotting for: 100a200
Saved to plots_100a200.pdf
Plotting for: de10a100
Saved to plots_de10a100.pdf
Plotting for: 1000a1100
Saved to plots_1000a1100.pdf


In [111]:
import os
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import seaborn as sns

for name, df in df_dict.items():
  print(f"Plotting for: {name}")
  df = df.copy()
  df['instance'] = df['file'].str.replace('.txt', '', regex=False)
  df = df.sort_values('id')
  xticks = df['instance'].tolist()
  step = max(1, len(xticks) // 20)

  # Create output folder
  output_dir = os.path.join("plots", name)
  os.makedirs(output_dir, exist_ok=True)

  # Plot 1: Makespan / Cmax
  with PdfPages(os.path.join(output_dir, "makespan_cmax.pdf")) as pdf:
    plt.figure(figsize=(14, 5))
    sns.scatterplot(x='instance', y=df['makespan'] / df['cmax'], data=df, marker='o')
    plt.axhline(2.0, color='red', linestyle='--', label='2.0 threshold')
    plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
    plt.ylabel('Makespan / Cmax')
    plt.title(f'Relative Makespan per Instance: {name}')
    plt.legend()
    plt.tight_layout()
    pdf.savefig()
    plt.close()

  # Plot 2: Time in seconds
  with PdfPages(os.path.join(output_dir, "execution_time.pdf")) as pdf:
    plt.figure(figsize=(14, 5))
    sns.barplot(x='instance', y='time_seconds', data=df)
    plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
    plt.ylabel('Time (s)')
    plt.title(f'Execution Time per Instance: {name}')
    plt.tight_layout()
    pdf.savefig()
    plt.close()

  # Plot 3: Integral assignment percentage
  with PdfPages(os.path.join(output_dir, "integral_percentage.pdf")) as pdf:
    plt.figure(figsize=(14, 5))
    sns.barplot(x='instance', y='integral_percentage', data=df)
    plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
    plt.ylabel('Integral Assignment (%)')
    plt.title(f'Integral Assignment Percentage per Instance: {name}')
    plt.tight_layout()
    pdf.savefig()
    plt.close()

  # Plot 4: Number of integral assignments
  with PdfPages(os.path.join(output_dir, "integral_assignments.pdf")) as pdf:
    plt.figure(figsize=(14, 5))
    sns.scatterplot(x='instance', y='integral_assignments', data=df, marker='o', s=60)
    plt.xticks(ticks=range(0, len(xticks), step), labels=xticks[::step], rotation=90)
    plt.ylabel('Number of Integral Assignments')
    plt.title(f'Integral Assignments per Instance: {name}')
    plt.tight_layout()
    pdf.savefig()
    plt.close()

  print(f"Saved plots to folder: {output_dir}")


Plotting for: MaqCorre
Saved plots to folder: plots/MaqCorre
Plotting for: JobsCorre
Saved plots to folder: plots/JobsCorre
Plotting for: 1a100
Saved plots to folder: plots/1a100
Plotting for: 100a120
Saved plots to folder: plots/100a120
Plotting for: 100a200
Saved plots to folder: plots/100a200
Plotting for: de10a100
Saved plots to folder: plots/de10a100
Plotting for: 1000a1100
Saved plots to folder: plots/1000a1100
