In [153]:
import pandas as pd
import json

In [154]:
# default list of games
games = ['imagegame', 'privateshared', 'taboo', 'matchit_ascii'] # instances_v1.6.json
# games = ['wordle', 'wordle_withclue', 'wordle_withcritic'] # no instances v2.0 (or v1.6?)
# games = ['referencegame'] # instances_v1.6_en.json
# games = ['matchit', 'codenames'] # instances_v1_6.json
# games = ['textmapworld_graphreasoning', 'textmapworld_main', 'textmapworld_specificroom'] # instances_v1.6.json, -p textmapworld/


# collection of which entries of the instance files should be compared per game
target_dict = {
    "imagegame": "target_grid",
    "privateshared": "slots",
    "referencegame": ["player_1_target_grid", "player_1_second_grid", "player_1_third_grid"], # Or is the order of grids also relevant?
    "taboo": "target_word",
    "wordle": "target_word",
    "wordle_withclue": "target_word",
    "wordle_withcritic": "target_word",
    "codenames": "assignments", # use special treatment!
    "matchit": ["image_a", "image_b"],
    "matchit_ascii": ["grid_a", "grid_b"],
    "textmapworld_graphreasoning": ["Current_Position", "Picture_Name", "Graph_Nodes", "Graph_Edges", "Mapping"],
    "textmapworld_main": ["Current_Position", "Picture_Name", "Graph_Nodes", "Graph_Edges", "Mapping"],
    "textmapworld_specificroom": ["Current_Position", "Picture_Name", "Specific_Room", "Graph_Nodes", "Graph_Edges", "Mapping"]
}

In [155]:
def check_overlap(games: list, path_to_games: str, to_compare: list, print_output: bool = False):
    for game in games:
        target = target_dict[game]
        dfs = tuple()

        for version in to_compare:
            targets = []
            epids = []
            instance_path = path_to_games + game + "/in/" + version

            with open(instance_path) as f:
                instances = json.load(f)
            for experiment in instances["experiments"]:
                for instance in experiment["game_instances"]:
                    epids.append(experiment["name"] + "_" + str(instance["game_id"]))
                    if isinstance(target, list):
                        this_target = " ".join([instance[t] for t in target])
                    else:
                        this_target = instance[target]
                        if isinstance(this_target, dict):
                            if game == "codenames":
                                this_target = special_codenames_treatment(this_target)
                            else:
                                this_target = str(this_target)
                        elif isinstance(this_target, list):
                            this_target = " ".join(this_target)

                    targets.append(this_target)

            df = pd.DataFrame({
                "epid": epids,
                "target": targets
            })
            dfs += (df,)

        overlap = pd.merge(dfs[0], dfs[1], how = "inner", on = "target")
        # print(f"Overlap DF for {game}:")
        # print(overlap)
        print(f"{len(overlap)} instance(s) overlap in {game} between {to_compare[0]} and {to_compare[1]}")
        if print_output and len(overlap) > 0:
            print(overlap[["epid_x", "epid_y"]])


def special_codenames_treatment(assignments : dict)-> str:
    """
    Returns the assignments of the codenames board sorted alphabetically as one string.
    """
    assignment_list = []
    for group, words in assignments.items():
        assignment_list += [group]
        assignment_list += sorted(words)
    return str(assignment_list)

In [156]:
path = ""
to_compare = ["instances.json", "instances_v1.6.json"]
print_output = True

check_overlap(games, path, to_compare, print_output)

1 instance(s) overlap in imagegame between instances.json and instances_v1.6.json
           epid_x          epid_y
0  random_grids_0  random_grids_0
0 instance(s) overlap in privateshared between instances.json and instances_v1.6.json
0 instance(s) overlap in taboo between instances.json and instances_v1.6.json
7 instance(s) overlap in matchit_ascii between instances.json and instances_v1.6.json
             epid_x            epid_y
0       same_grid_8       same_grid_3
1  similar_grid_1_3  similar_grid_1_0
2  similar_grid_1_4  similar_grid_1_9
3  similar_grid_1_5  similar_grid_1_5
4  similar_grid_1_6  similar_grid_1_3
5  similar_grid_1_8  similar_grid_1_8
6  similar_grid_2_8  similar_grid_2_3


In [157]:
games = ['referencegame'] # instances_v1.6_en.json
to_compare = ["instances.json", "instances_v1.6_en.json"]
check_overlap(games, path, to_compare, print_output)

0 instance(s) overlap in referencegame between instances.json and instances_v1.6_en.json


In [158]:
games = ['matchit', 'codenames'] # instances_v1_6.json
to_compare = ["instances.json", "instances_v1_6.json"]
check_overlap(games, path, to_compare, print_output)

30 instance(s) overlap in matchit between instances.json and instances_v1_6.json
               epid_x             epid_y
0        same_image_0       same_image_0
1        same_image_1       same_image_1
2        same_image_2       same_image_2
3        same_image_3       same_image_3
4        same_image_4       same_image_4
5        same_image_5       same_image_5
6        same_image_6       same_image_6
7        same_image_7       same_image_7
8        same_image_8       same_image_8
9        same_image_9       same_image_9
10    similar_image_0    similar_image_0
11    similar_image_1    similar_image_1
12    similar_image_2    similar_image_2
13    similar_image_3    similar_image_3
14    similar_image_4    similar_image_4
15    similar_image_5    similar_image_5
16    similar_image_6    similar_image_6
17    similar_image_7    similar_image_7
18    similar_image_8    similar_image_8
19    similar_image_9    similar_image_9
20  different_image_0  different_image_0
21  different_ima

In [159]:
games = ['textmapworld_graphreasoning', 'textmapworld_main', 'textmapworld_specificroom'] # instances_v1.6.json, -p textmapworld/
path = "textmapworld/"
to_compare = ["instances.json", "instances_v1.6.json"]
print_output = False

check_overlap(games, path, to_compare, print_output)

30 instance(s) overlap in textmapworld_graphreasoning between instances.json and instances_v1.6.json
50 instance(s) overlap in textmapworld_main between instances.json and instances_v1.6.json
18 instance(s) overlap in textmapworld_specificroom between instances.json and instances_v1.6.json


In [165]:
def check_overlap_direct(games: list, path_to_games: str, to_compare: list, print_output: bool = False):
    overlaps = dict()
    no_overlaps = dict()
    for game in games:
        overlaps[game] = list()
        game_overlaps = overlaps[game]

        no_overlaps[game] = list()
        game_no_overlaps = no_overlaps[game]

        target = target_dict[game]
        if not type(target) == list:
            target = [target]

        v2_instance_path = path_to_games + game + "/in/" + to_compare[0]
        with open(v2_instance_path) as f:
            v2_instances = json.load(f)

        v1_6_instance_path = path_to_games + game + "/in/" + to_compare[1]
        with open(v1_6_instance_path) as f:
            v1_6_instances = json.load(f)

        instance_count = 0

        for experiment in v2_instances["experiments"]:
            for instance in experiment["game_instances"]:
                no_overlap = True
                instance_count += 1
                inst_id = experiment["name"] + "_" + str(instance["game_id"])
                # print(inst_id)
                for sub_target in target:
                    # print(f"v2 sub-target: {sub_target}")
                    inst_target = instance[sub_target]
                    # print(inst_target)
                    for experiment_old in v1_6_instances["experiments"]:
                        for instance_old in experiment_old["game_instances"]:
                            inst_id_old = experiment_old["name"] + "_" + str(instance_old["game_id"])
                            # print(inst_id_old)
                            old_inst_target = instance_old[sub_target]
                            # print(old_inst_target)
                            if inst_target == old_inst_target:
                                no_overlap = False
                                if [sub_target, inst_id, inst_id_old] not in game_overlaps:
                                    game_overlaps.append([sub_target, inst_id, inst_id_old])
                if no_overlap:
                    game_no_overlaps.append(inst_id)

        print(f"{len(game_overlaps)} overlaps out of {instance_count} in {game} between {to_compare[0]} and {to_compare[1]}")

    # return {'overlaps': overlaps, 'no_overlaps': no_overlaps}
    return overlaps


def same_or_other_overlap(overlaps_dict: dict):
    for game, overlaps in overlaps_dict.items():
        total_count = 0
        same_count = 0
        overlap_count = 0
        for overlap in overlaps:
            total_count += 1
            if overlap[1] == overlap[2]:
                same_count += 1
            else:
                overlap_count += 1
        print(f"{game} has {same_count} same instances and {overlap_count} basic overlaps.")

In [166]:
games = ['imagegame', 'privateshared', 'taboo', 'matchit_ascii'] # instances_v1.6.json
path = ""
to_compare = ["instances.json", "instances_v1.6.json"]
print_output = False

check_overlap_direct(games, path, to_compare, print_output)

1 overlaps out of 60 in imagegame between instances.json and instances_v1.6.json
0 overlaps out of 50 in privateshared between instances.json and instances_v1.6.json
0 overlaps out of 60 in taboo between instances.json and instances_v1.6.json
56 overlaps out of 40 in matchit_ascii between instances.json and instances_v1.6.json


{'imagegame': [['target_grid', 'random_grids_0', 'random_grids_0']],
 'privateshared': [],
 'taboo': [],
 'matchit_ascii': [['grid_a', 'same_grid_0', 'different_grid_2'],
  ['grid_b', 'same_grid_1', 'different_grid_5'],
  ['grid_a', 'same_grid_2', 'similar_grid_1_8'],
  ['grid_a', 'same_grid_5', 'different_grid_9'],
  ['grid_b', 'same_grid_5', 'similar_grid_1_9'],
  ['grid_a', 'same_grid_6', 'different_grid_8'],
  ['grid_b', 'same_grid_6', 'different_grid_1'],
  ['grid_b', 'same_grid_6', 'different_grid_2'],
  ['grid_a', 'same_grid_8', 'same_grid_3'],
  ['grid_a', 'same_grid_8', 'similar_grid_2_3'],
  ['grid_b', 'same_grid_8', 'same_grid_3'],
  ['grid_b', 'same_grid_8', 'different_grid_7'],
  ['grid_a', 'same_grid_9', 'similar_grid_1_2'],
  ['grid_a', 'same_grid_9', 'different_grid_1'],
  ['grid_a', 'similar_grid_1_0', 'same_grid_5'],
  ['grid_a', 'similar_grid_1_0', 'similar_grid_2_5'],
  ['grid_b', 'similar_grid_1_1', 'different_grid_6'],
  ['grid_a', 'similar_grid_1_2', 'similar_gri

In [167]:
same_or_other_overlap(check_overlap_direct(games, path, to_compare, print_output))

1 overlaps out of 60 in imagegame between instances.json and instances_v1.6.json
0 overlaps out of 50 in privateshared between instances.json and instances_v1.6.json
0 overlaps out of 60 in taboo between instances.json and instances_v1.6.json
56 overlaps out of 40 in matchit_ascii between instances.json and instances_v1.6.json
imagegame has 1 same instances and 0 basic overlaps.
privateshared has 0 same instances and 0 basic overlaps.
taboo has 0 same instances and 0 basic overlaps.
matchit_ascii has 4 same instances and 52 basic overlaps.


In [168]:
games = ['referencegame'] # instances_v1.6_en.json
to_compare = ["instances.json", "instances_v1.6_en.json"]
check_overlap_direct(games, path, to_compare, print_output)

0 overlaps out of 90 in referencegame between instances.json and instances_v1.6_en.json


{'referencegame': []}

In [169]:
same_or_other_overlap(check_overlap_direct(games, path, to_compare, print_output))

0 overlaps out of 90 in referencegame between instances.json and instances_v1.6_en.json
referencegame has 0 same instances and 0 basic overlaps.


In [170]:
games = ['matchit', 'codenames'] # instances_v1_6.json
to_compare = ["instances.json", "instances_v1_6.json"]
check_overlap_direct(games, path, to_compare, print_output)

60 overlaps out of 30 in matchit between instances.json and instances_v1_6.json
0 overlaps out of 130 in codenames between instances.json and instances_v1_6.json


{'matchit': [['image_a', 'same_image_0', 'same_image_0'],
  ['image_b', 'same_image_0', 'same_image_0'],
  ['image_a', 'same_image_1', 'same_image_1'],
  ['image_b', 'same_image_1', 'same_image_1'],
  ['image_a', 'same_image_2', 'same_image_2'],
  ['image_b', 'same_image_2', 'same_image_2'],
  ['image_a', 'same_image_3', 'same_image_3'],
  ['image_b', 'same_image_3', 'same_image_3'],
  ['image_a', 'same_image_4', 'same_image_4'],
  ['image_b', 'same_image_4', 'same_image_4'],
  ['image_a', 'same_image_5', 'same_image_5'],
  ['image_b', 'same_image_5', 'same_image_5'],
  ['image_a', 'same_image_6', 'same_image_6'],
  ['image_b', 'same_image_6', 'same_image_6'],
  ['image_a', 'same_image_7', 'same_image_7'],
  ['image_b', 'same_image_7', 'same_image_7'],
  ['image_a', 'same_image_8', 'same_image_8'],
  ['image_b', 'same_image_8', 'same_image_8'],
  ['image_a', 'same_image_9', 'same_image_9'],
  ['image_b', 'same_image_9', 'same_image_9'],
  ['image_a', 'similar_image_0', 'similar_image_0

In [171]:
same_or_other_overlap(check_overlap_direct(games, path, to_compare, print_output))

60 overlaps out of 30 in matchit between instances.json and instances_v1_6.json
0 overlaps out of 130 in codenames between instances.json and instances_v1_6.json
matchit has 60 same instances and 0 basic overlaps.
codenames has 0 same instances and 0 basic overlaps.


In [172]:
games = ['textmapworld_graphreasoning', 'textmapworld_main', 'textmapworld_specificroom'] # instances_v1.6.json, -p textmapworld/
path = "textmapworld/"
to_compare = ["instances.json", "instances_v1.6.json"]
print_output = False

check_overlap_direct(games, path, to_compare, print_output)

184 overlaps out of 30 in textmapworld_graphreasoning between instances.json and instances_v1.6.json
300 overlaps out of 50 in textmapworld_main between instances.json and instances_v1.6.json
477 overlaps out of 30 in textmapworld_specificroom between instances.json and instances_v1.6.json


{'textmapworld_graphreasoning': [['Current_Position', 'small_0', 'small_0'],
  ['Picture_Name', 'small_0', 'small_0'],
  ['Graph_Nodes', 'small_0', 'small_0'],
  ['Graph_Edges', 'small_0', 'small_0'],
  ['Mapping', 'small_0', 'small_0'],
  ['Current_Position', 'small_1', 'small_1'],
  ['Current_Position', 'small_1', 'small_2'],
  ['Current_Position', 'small_1', 'large_23'],
  ['Current_Position', 'small_1', 'large_29'],
  ['Picture_Name', 'small_1', 'small_1'],
  ['Graph_Nodes', 'small_1', 'small_1'],
  ['Graph_Edges', 'small_1', 'small_1'],
  ['Mapping', 'small_1', 'small_1'],
  ['Current_Position', 'small_2', 'small_1'],
  ['Current_Position', 'small_2', 'small_2'],
  ['Current_Position', 'small_2', 'large_23'],
  ['Current_Position', 'small_2', 'large_29'],
  ['Picture_Name', 'small_2', 'small_2'],
  ['Graph_Nodes', 'small_2', 'small_2'],
  ['Graph_Edges', 'small_2', 'small_2'],
  ['Mapping', 'small_2', 'small_2'],
  ['Current_Position', 'small_3', 'small_3'],
  ['Picture_Name', 'sm

In [173]:
same_or_other_overlap(check_overlap_direct(games, path, to_compare, print_output))

184 overlaps out of 30 in textmapworld_graphreasoning between instances.json and instances_v1.6.json
300 overlaps out of 50 in textmapworld_main between instances.json and instances_v1.6.json
477 overlaps out of 30 in textmapworld_specificroom between instances.json and instances_v1.6.json
textmapworld_graphreasoning has 150 same instances and 34 basic overlaps.
textmapworld_main has 250 same instances and 50 basic overlaps.
textmapworld_specificroom has 166 same instances and 311 basic overlaps.
