In [8]:
import time
import os
import glob

# --- Helper Functions ---

def parse_set_cover_instance(filename):
    subsets = []
    universe_size = 0
    try:
        with open(filename, 'r') as f:
            line1 = f.readline().strip().split()
            if len(line1) != 2:
                print(f"Error: First line format incorrect in {filename}")
                return None, None
            universe_size = int(line1[0])
            num_subsets = int(line1[1])

            for i in range(num_subsets):
                line = f.readline().strip().split()
                if not line:
                    print(f"Warning: Empty line {i+2} in {filename}")
                    continue
                try:
                    elements = set(map(int, line[1:]))
                    subsets.append(elements)
                except Exception as e:
                    print(f"Error parsing line {i+2} in {filename}: {e}")
                    return None, None

        return universe_size, subsets
    except Exception as e:
        print(f"Error parsing {filename}: {e}")
        return None, None


def read_optimal_value(filename):
    """
    Reads the optimal solution size from the .out file.
    Assumes the first line is an integer representing the minimal number of sets.
    """
    try:
        with open(filename, 'r') as f:
            first_line = f.readline().strip()
            return int(first_line)
    except FileNotFoundError:
        print(f"Warning: Optimal value file not found: {filename}")
        return None
    except Exception as e:
        print(f"Error reading optimal value file {filename}: {e}")
        return None


def write_solution_file(filename_base, method, cutoff, seed, solution_indices, quality):
    sol_filename = f"{filename_base}_{method}_{cutoff}.sol"
    try:
        with open(sol_filename, 'w') as f:
            f.write(f"{quality}\n")
            f.write(" ".join(map(str, solution_indices)) + "\n")
    except Exception as e:
        print(f"Error writing solution file {sol_filename}: {e}")


# --- Greedy Set Cover Algorithm ---

def greedy_set_cover(universe_size, subsets):
    start_time = time.time()
    uncovered = set(range(1, universe_size + 1))
    cover_indices = []
    remaining = [(i + 1, s.copy()) for i, s in enumerate(subsets)]

    while uncovered and remaining:
        best_idx, best_set, best_i = -1, set(), -1
        max_covered = -1

        for i, (idx, subset) in enumerate(remaining):
            covered = subset & uncovered
            if len(covered) > max_covered:
                best_idx, best_set, best_i = idx, covered, i
                max_covered = len(covered)

        if max_covered <= 0:
            break

        cover_indices.append(best_idx)
        uncovered -= best_set
        remaining.pop(best_i)

    end_time = time.time()
    return sorted(cover_indices), universe_size - len(uncovered), end_time - start_time


# --- Experiment Runner ---

def run_experiment(data_dir, pattern, method, cutoff, seed=None):
    results = []
    output_dir = "output"
    os.makedirs(output_dir, exist_ok=True)

    input_files = sorted(glob.glob(os.path.join(data_dir, pattern)))
    if not input_files:
        print(f"Warning: No input files found for {pattern} in {data_dir}")
        return results

    print(f"\n--- Running {method} on {pattern} ---")

    for in_file in input_files:
        base = os.path.basename(in_file).replace('.in', '')
        out_file = os.path.join(data_dir, base + '.out')
        print(f"Processing: {base}")

        universe_size, subsets = parse_set_cover_instance(in_file)
        if universe_size is None:
            results.append({'Instance': base, 'Time': 0, 'Quality': 'Error', 'Optimal': 'N/A', 'RelErr': 'N/A', 'Method': method})
            continue

        opt_value = read_optimal_value(out_file)
        cover_indices, covered_count, exec_time = greedy_set_cover(universe_size, subsets)
        quality = len(cover_indices)

        rel_err = None
        opt_display = 'N/A'
        if opt_value is not None:
            opt_display = opt_value
            if opt_value > 0:
                rel_err = (quality - opt_value) / float(opt_value)
            else:
                rel_err = float('inf') if quality > 0 else 0.0
            print(f"  Quality: {quality}, Optimal: {opt_display}, RelErr: {rel_err:.4f}, Time: {exec_time:.4f}s")
        else:
            print(f"  Quality: {quality}, Optimal: N/A, RelErr: N/A, Time: {exec_time:.4f}s")

        sol_file_base = os.path.join(output_dir, base)
        write_solution_file(sol_file_base, method, cutoff, seed, cover_indices, quality)

        results.append({
            'Instance': base,
            'Time': exec_time,
            'Quality': quality,
            'Optimal': opt_display,
            'RelErr': rel_err,
            'CoveredElements': covered_count,
            'UniverseSize': universe_size,
            'Method': method
        })

    return results


# --- Configuration and Execution ---

DATA_DIRECTORY = "data"
CUTOFF_TIME = 600
RANDOM_SEED = None
METHOD_NAME = "Approx"

all_results = []
all_results.extend(run_experiment(DATA_DIRECTORY, "test*.in", METHOD_NAME, CUTOFF_TIME))
all_results.extend(run_experiment(DATA_DIRECTORY, "small*.in", METHOD_NAME, CUTOFF_TIME))
all_results.extend(run_experiment(DATA_DIRECTORY, "large*.in", METHOD_NAME, CUTOFF_TIME))

print("\n--- Experiment Complete ---")

# --- Display Results Table ---

try:
    import pandas as pd
    df = pd.DataFrame(all_results)

    def format_rel_err(x):
        if x is None: return 'N/A'
        if x == float('inf'): return 'inf'
        return f"{x:.4f}"

    df['RelErr_Display'] = df['RelErr'].apply(format_rel_err)
    df['Time'] = df['Time'].apply(lambda x: f"{x:.4f}" if isinstance(x, (int, float)) else str(x))

    print("\n--- Results Summary ---")
    print(df[['Instance', 'Method', 'Time', 'Quality', 'Optimal', 'RelErr_Display']])
except ImportError:
    print("Pandas not installed. Raw results:")
    for res in all_results:
        err = 'N/A' if res['RelErr'] is None else (f"{res['RelErr']:.4f}" if res['RelErr'] != float('inf') else 'inf')
        print(f"{res['Instance']}: Time={res['Time']:.4f}, Quality={res['Quality']}, Optimal={res['Optimal']}, RelErr={err}")



--- Running Approx on test*.in ---
Processing: test1
  Quality: 2, Optimal: 2, RelErr: 0.0000, Time: 0.0000s
Processing: test2
  Quality: 3, Optimal: 2, RelErr: 0.5000, Time: 0.0000s
Processing: test3
  Quality: 7, Optimal: 6, RelErr: 0.1667, Time: 0.0000s
Processing: test4
  Quality: 5, Optimal: 4, RelErr: 0.2500, Time: 0.0000s
Processing: test5
  Quality: 5, Optimal: 4, RelErr: 0.2500, Time: 0.0000s

--- Running Approx on small*.in ---
Processing: small1
  Quality: 5, Optimal: 5, RelErr: 0.0000, Time: 0.0000s
Processing: small10
  Quality: 3, Optimal: 2, RelErr: 0.5000, Time: 0.0000s
Processing: small11
  Quality: 5, Optimal: 4, RelErr: 0.2500, Time: 0.0001s
Processing: small12
  Quality: 4, Optimal: 3, RelErr: 0.3333, Time: 0.0000s
Processing: small13
  Quality: 3, Optimal: 2, RelErr: 0.5000, Time: 0.0000s
Processing: small14
  Quality: 3, Optimal: 2, RelErr: 0.5000, Time: 0.0000s
Processing: small15
  Quality: 3, Optimal: 2, RelErr: 0.5000, Time: 0.0000s
Processing: small16
  Qual

In [9]:
# --- Optional: Save and Display Results Table (using pandas) ---
try:
    import pandas as pd
    df = pd.DataFrame(all_results)

    # Add formatted columns
    def format_rel_err(x):
        if x is None: return 'N/A'
        if x == float('inf'): return 'inf'
        if isinstance(x, (int, float)): return f"{x:.4f}"
        return str(x)

    df['RelErr_Display'] = df['RelErr'].apply(format_rel_err)
    df['Time'] = df['Time'].apply(lambda x: f"{x:.4f}" if isinstance(x, (int, float)) else str(x))
    
    # Rename method for display
    df['Method'] = 'Greedy Set Cover'  # Override whatever is set earlier

    # Arrange final DataFrame for CSV output
    output_columns = ['Instance', 'Method', 'Time', 'Quality', 'Optimal', 'RelErr_Display']
    csv_df = df[output_columns].copy()
    csv_output_path = os.path.join("output", "greedy_results_summary.csv")
    csv_df.to_csv(csv_output_path, index=False)

    print(f"\nResults written to CSV: {csv_output_path}")
    print("\n--- Results Summary ---")
    print(csv_df)

except ImportError:
    print("\nPandas not installed. Skipping results table display and CSV export.")



Results written to CSV: output/greedy_results_summary.csv

--- Results Summary ---
   Instance            Method    Time  Quality  Optimal RelErr_Display
0     test1  Greedy Set Cover  0.0000        2        2         0.0000
1     test2  Greedy Set Cover  0.0000        3        2         0.5000
2     test3  Greedy Set Cover  0.0000        7        6         0.1667
3     test4  Greedy Set Cover  0.0000        5        4         0.2500
4     test5  Greedy Set Cover  0.0000        5        4         0.2500
5    small1  Greedy Set Cover  0.0000        5        5         0.0000
6   small10  Greedy Set Cover  0.0000        3        2         0.5000
7   small11  Greedy Set Cover  0.0001        5        4         0.2500
8   small12  Greedy Set Cover  0.0000        4        3         0.3333
9   small13  Greedy Set Cover  0.0000        3        2         0.5000
10  small14  Greedy Set Cover  0.0000        3        2         0.5000
11  small15  Greedy Set Cover  0.0000        3        2         