In [1]:
import csv
task_to_scenes = {}
with open(r"C:\Users\Cem\Downloads\b1k-tasks.csv", "r") as f:
    for row in csv.DictReader(f):
        scene_list_str = row["Matched Planned Scenes"].replace(" ", "").replace("\n", "").strip()
        task_scenes = scene_list_str.split(",")
        task_to_scenes[row["Task Name"]] = {x for x in task_scenes if x}

In [2]:
len(task_to_scenes)

1025

In [3]:
scenes = {scene for task_scenes in task_to_scenes.values() for scene in task_scenes}
print(scenes)

{'gates_bedroom', 'Beechwood_0_garden', 'grocery_store_cafe', 'Pomaria_0_garden', 'restaurant_asian', 'grocery_store_asian', 'office_bike', 'house_double_floor_lower', 'office_vendor_machine', 'hall_arch_wood', 'school_chemistry', 'hotel_suite_small', 'office_cubicles_left', 'house_double_floor_upper', 'Merom_0_int', 'Wainscott_1_int', 'Merom_0_garden', 'hotel_gym_spa', 'house_single_floor', 'restaurant_brunch', 'Ihlen_1_int', 'Wainscott_0_garden', 'grocery_store_convenience', 'hall_conference_large', 'hotel_suite_large', 'Merom_1_int', 'Wainscott_0_int', 'Rs_int', 'restaurant_diner', 'office_large', 'restaurant_cafeteria', 'restaurant_urban', 'Rs_garden', 'hall_glass_ceiling', 'Benevolence_0_int', 'Beechwood_1_int', 'Benevolence_1_int', 'Ihlen_0_int', 'hall_train_station', 'office_cubicles_right', 'Pomaria_0_int', 'Pomaria_2_int', 'grocery_store_half_stocked', 'school_biology', 'school_computer_lab_and_infirmary', 'restaurant_hotel', 'school_geography', 'Beechwood_0_int', 'Benevolence

In [4]:
# Starting pick
ig_scenes = {x for x in scenes if x.endswith("_int")}
starting_set = ig_scenes | {
    "house_single_floor",
    "house_double_floor_lower",
    "house_double_floor_upper",
}

In [5]:
# What activities do these scenes cover?
def cover(scene_set):
    return [task for task, task_scenes in task_to_scenes.items() if task_scenes & scene_set]

len(cover(starting_set))

918

In [6]:
# Greedy algorithm step
def next_pick(scene_set):
    candidates = scenes - scene_set
    current_cover = len(cover(scene_set))
    advantages = {x: len(cover(scene_set | {x})) - current_cover for x in candidates}
    # print(sorted(advantages.items(), key=lambda x: x[1]))
    return max(advantages.items(), key=lambda x: x[1])
next_pick(starting_set)

('grocery_store_cafe', 41)

In [7]:
# Run the greedy algorithm
max_cover = len(cover(scenes))
greedy_selection = set(starting_set)
while len(cover(greedy_selection)) < max_cover:
    pick = next_pick(greedy_selection)
    print("Adding", pick)
    greedy_selection.add(pick[0])
print("Final set:", sorted(greedy_selection))
print(len(greedy_selection))

Adding ('grocery_store_cafe', 41)
Adding ('restaurant_brunch', 10)
Adding ('office_large', 9)
Adding ('Pomaria_0_garden', 6)
Adding ('school_geography', 4)
Adding ('hotel_gym_spa', 3)
Adding ('grocery_store_asian', 1)
Adding ('Wainscott_0_garden', 1)
Adding ('school_computer_lab_and_infirmary', 1)
Final set: ['Beechwood_0_int', 'Beechwood_1_int', 'Benevolence_0_int', 'Benevolence_1_int', 'Benevolence_2_int', 'Ihlen_0_int', 'Ihlen_1_int', 'Merom_0_int', 'Merom_1_int', 'Pomaria_0_garden', 'Pomaria_0_int', 'Pomaria_1_int', 'Pomaria_2_int', 'Rs_int', 'Wainscott_0_garden', 'Wainscott_0_int', 'Wainscott_1_int', 'grocery_store_asian', 'grocery_store_cafe', 'hotel_gym_spa', 'house_double_floor_lower', 'house_double_floor_upper', 'house_single_floor', 'office_large', 'restaurant_brunch', 'school_computer_lab_and_infirmary', 'school_geography']
27


In [8]:
import itertools, tqdm

print("Max possible cover:", max_cover)

# Run a non-greedy version
def combinatorial_search():
    max_extra_scenes_needed = len(greedy_selection) - len(starting_set)
    print("We will process up to", max_extra_scenes_needed, "extra scenes")
    extra_scene_candidates = scenes - starting_set
    print("We have", len(extra_scene_candidates), "candidates")
    
    def combinatorial_search_n(extra_to_use):
        best_cover = 0
        best_cover_set = set()
        
        print("Processing", extra_to_use, "element combinations")
        for combination_extra in tqdm.tqdm(itertools.combinations(extra_scene_candidates, extra_to_use)):
            combination = starting_set | set(combination_extra)
            combination_cover = len(cover(combination))

            if combination_cover >= best_cover:
                best_cover = combination_cover
                best_cover_set = combination
            
            if combination_cover == max_cover:
                break
                
        return best_cover_set, best_cover
    
    for extra_to_use in range(1, max_extra_scenes_needed + 1):
        combination, combination_cover = combinatorial_search_n(extra_to_use)
        print(f"Best total with {extra_to_use} elements: {combination_cover}. Scenes: {sorted(combination - starting_set)}")
        
        if combination_cover == max_cover:
            print("Final set.")
            return
            
combinatorial_search()

Max possible cover: 994
We will process up to 9 extra scenes
We have 33 candidates
Processing 1 element combinations


33it [00:00, 1834.71it/s]


Best total with 1 elements: 959. Scenes: ['grocery_store_cafe']
Processing 2 element combinations


528it [00:00, 2535.24it/s]


Best total with 2 elements: 969. Scenes: ['grocery_store_cafe', 'restaurant_brunch']
Processing 3 element combinations


5456it [00:02, 2474.34it/s]


Best total with 3 elements: 978. Scenes: ['grocery_store_cafe', 'office_large', 'restaurant_brunch']
Processing 4 element combinations


40920it [00:15, 2570.84it/s]


Best total with 4 elements: 984. Scenes: ['Pomaria_0_garden', 'grocery_store_cafe', 'office_large', 'restaurant_brunch']
Processing 5 element combinations


237336it [01:32, 2577.99it/s]


Best total with 5 elements: 988. Scenes: ['Pomaria_0_garden', 'grocery_store_cafe', 'office_large', 'restaurant_brunch', 'school_geography']
Processing 6 element combinations


1000752it [06:31, 2553.48it/s]


KeyboardInterrupt: 

In [10]:
# Pomaria-advantage
print(len(cover(starting_set | {"Pomaria_0_garden"})) - len(cover(starting_set)))

6


In [11]:
final_set = starting_set | {'Pomaria_0_garden', 'grocery_store_cafe', 'office_large', 'restaurant_brunch'}
len(cover(final_set))

984

In [12]:
print(final_set - ig_scenes)

{'house_single_floor', 'restaurant_brunch', 'house_double_floor_upper', 'grocery_store_cafe', 'Pomaria_0_garden', 'house_double_floor_lower', 'office_large'}
