In [None]:
import pandas as pd

def sum_keys_and_sort(keys):
    # Read the CSV file
    df = pd.read_csv('top40.csv')
    
    # Group by configuration and sum the time for the given keys
    df['time'] = df['time'].apply(lambda x: 3600 if x > 1800 else x)
    # Filter out keys that are 3600 for all configurations
    all_3600_keys = df.groupby('key')['time'].apply(lambda x: all(t == 3600 for t in x)).reset_index()
    keys_to_include = all_3600_keys[all_3600_keys['time'] == False]['key']
    df = df[df['key'].isin(keys_to_include)]
    grouped = df[df['key'].isin(keys)].groupby('configuration')['time'].sum().reset_index()

    result = sorted(grouped.itertuples(index=False, name=None), key=lambda x: x[1])

    return result



In [9]:
from gbd_core.api import GBD
from scipy.optimize import linprog
import matplotlib.pyplot as plt

df = pd.read_csv('top40.csv')
configurations = df['configuration'].unique().tolist()

with GBD([ '/home/raphael-zipperer/Uni/BA/database/base.db', '/home/raphael-zipperer/Uni/BA/database/meta.db' ]) as gbd:
        data = gbd.query('(track=main_2023 or track=main_2024) and minisat1m!=yes', resolve=["family"])

family_hashes = {}
for family in data['family'].unique():
    family_hashes[family] = data[data['family'] == family]['hash'].tolist()

fam_confs = {}
for f, hashes in family_hashes.items():
    conf_times = sum_keys_and_sort(hashes)

    # Extract the first 3 elements
    #first_three = conf_times[:3]
    #filtered_conf_times = [conf[0] for conf in conf_times if conf_times[0][1] / conf[1] > 0.8]
    # Convert the configuration to its index in configurations list
    filtered_conf_times = [configurations.index(conf[0]) for conf in conf_times if conf_times[0][1] / conf[1] > 0.2]
    fam_confs[f] = filtered_conf_times
    #print(len(filtered_conf_times))
    
    # Extract the configuration names and times
    #configurations = [conf[0] for conf in first_three]
    #times = [conf[1] for conf in first_three]

print(fam_confs)
fam_confs = {k: v for k, v in fam_confs.items() if v}
confs = [v for _,v in fam_confs.items()]

# Create the matrix A for the hitting set problem
A = [[1 if j in conf else 0 for j in range(40)] for conf in confs]

# Create the cost vector c (all ones, since we want to minimize the number of configurations)
c = [1] * 40


def branch_and_cut(A, c):
    num_vars = len(c)
    best_solution = None
    best_value = float('inf')
    
    def solve_lp(A, c, bounds):
        res = linprog(c, A_eq=A, b_eq=[1] * len(A), bounds=bounds, method='highs')
        return res

    def branch(bounds):
        nonlocal best_solution, best_value
        
        res = solve_lp(A, c, bounds)
        
        if not res.success:
            return
        
        if all(x.is_integer() for x in res.x):
            value = sum(res.x)
            if value < best_value:
                best_value = value
                best_solution = res.x
            return
        
        for i in range(num_vars):
            if not res.x[i].is_integer():
                break
        
        lower_bounds = bounds.copy()
        lower_bounds[i] = (0, 0)
        branch(lower_bounds)
        
        upper_bounds = bounds.copy()
        upper_bounds[i] = (1, 1)
        branch(upper_bounds)
    
    initial_bounds = [(0, 1)] * num_vars
    branch(initial_bounds)
    
    return best_solution

solution = branch_and_cut(A, c)
print(solution)
hitting_set = [i for i, x in enumerate(solution) if x > 0.5]

print("Hitting set:", hitting_set)

{'cellular-automata': [14, 38, 33, 18, 10, 26, 31, 15], 'hardware-verification': [17, 4, 38, 27, 0, 30, 18, 11, 39], 'social-golfer': [6, 12, 21, 35, 34, 11, 8, 31, 36, 9, 0, 26, 13, 2, 32, 37, 33, 14, 15, 3, 24, 29, 1, 18, 10, 22, 38, 5, 28, 17, 16, 20, 39, 4, 19, 23, 25, 27, 7, 30], 'cryptography-simon': [4, 11, 21, 38, 19, 2, 18, 32, 33, 8, 12, 31, 35, 9, 13, 0, 16, 25, 5, 15, 28, 26, 22, 6, 34, 14, 36, 37, 7], 'miter': [0, 12, 38, 4, 25, 31, 13, 33, 32, 17, 35, 9, 21, 23, 11, 27, 39, 18, 16, 1, 2, 24, 15, 28, 29, 20, 8, 36, 26, 30, 10, 37, 3, 7, 6, 34, 14, 5, 19, 22], 'cryptography': [11, 8, 33, 0, 31, 26, 36, 32, 21, 6, 14, 2, 10, 34, 20, 18, 13, 3, 37, 24, 12, 15, 9, 16, 28, 29, 22, 38, 5, 4, 39, 27, 1, 30, 17, 35, 7, 23], 'planning': [38, 27, 17, 39, 4, 0, 9, 11, 1, 35, 18, 26, 33, 30, 6, 14], 'subgraph-isomorphism': [39, 38, 12, 27, 3, 17, 16, 30, 4, 11, 31, 0, 35, 26, 34, 37, 18, 15, 36, 29, 2, 9, 33, 8, 32, 21, 13, 25, 28], 'hamiltonian': [21, 24, 0, 29, 12, 2, 10, 20, 32, 3,

TypeError: 'NoneType' object is not iterable