In [27]:
%cd ~/Documents/dofus_scrap/

import os
import json
from pprint import pprint


def load_json(path: str) -> dict:
    with open(path, 'r') as json_file:
        data = json.load(json_file)
    return data


json_paths = [
    os.path.join('data', f)
    for f in os.listdir('data')
    if f.endswith('.json')
]

data = {
    path.split('/')[1].split('.')[0]: load_json(path)
    for path in json_paths
}

/home/pierrotlc/Documents/dofus_scrap


## Primary keys

In [18]:
def get_primary_keys(data: dict, keep_all: bool) -> dict[str, set[str]]:
    primary_keys = dict()
    for category, items in data.items():
        primary_keys[category] = set()
        for i in items:
            if keep_all or primary_keys[category] == set():
                primary_keys[category] |= set(i.keys())
            elif 'erreur 404' not in i.keys():
                primary_keys[category] &= set(i.keys())

    return primary_keys


def get_all_unique_primary_keys_set(data: dict) -> dict[str, set[frozenset[str]]]:
    primary_keys = dict()
    for category, items in data.items():
        primary_keys[category] = set()
        for i in items:
            primary_keys[category].add(frozenset(i.keys()))

    return primary_keys


primary_keys = get_primary_keys(data, keep_all=True)

Primary keys for each category.

In [7]:
for category, keys in primary_keys.items():
    print(f'For category: {category}')
    for k in sorted(keys):
        print(f' - {k}')
    print('')

For category: ressources
 - Type
 - conditions
 - description
 - effets
 - erreur 404
 - illustration_url
 - niveau
 - nom
 - recette
 - url

For category: consommables
 - Type
 - conditions
 - description
 - effets
 - erreur 404
 - illustration_url
 - niveau
 - nom
 - recette
 - url

For category: équipements
 - Type
 - conditions
 - description
 - effets
 - erreur 404
 - illustration_url
 - niveau
 - nom
 - recette
 - url

For category: armes
 - Type
 - caractéristiques
 - conditions
 - description
 - dégâts
 - effets
 - erreur 404
 - illustration_url
 - niveau
 - nom
 - recette
 - url

For category: objets d'apparat
 - Type
 - conditions
 - description
 - erreur 404
 - illustration_url
 - niveau
 - nom
 - recette
 - url

For category: bestiaire
 - Race
 - butins
 - butins conditionnés
 - caractéristiques
 - de la même famille
 - illustration_url
 - niveau
 - nom
 - résistances
 - url

For category: compagnons
 - Type
 - description
 - illustration_url
 - nom
 - url

For category: fa

Primary keys common for all categories.

In [21]:
common = set()
for pk in primary_keys.values():
    common |= pk
    
for pk in primary_keys.values():
    common &= pk

print('Common primary keys:')
for k in common:
    print(f'- {k}')
    

cats = set(primary_keys.keys())
cats.remove('bestiaire')
common = set()
for cat, pk in primary_keys.items():
    if cat in cats:
        common |= pk
    
for cat, pk in primary_keys.items():
    if cat in cats:
        common &= pk

print('Common without bestiaire:')
for k in common:
    print(f'- {k}')

Common primary keys:
- nom
- illustration_url
- url
Common without bestiaire:
- illustration_url
- nom
- Type
- url


Now we select the keys that are present in all items of each category

In [16]:
primary_keys = get_all_unique_primary_keys_set(data)
for cat, keys in primary_keys.items():
    print(f'For {cat}:')
    for k in keys:
        print(list(sorted(k)))
    print()

For ressources:
['Type', 'description', 'effets', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'conditions', 'description', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'conditions', 'description', 'effets', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'description', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['erreur 404', 'url']
['Type', 'description', 'illustration_url', 'niveau', 'nom', 'url']
['Type', 'description', 'effets', 'illustration_url', 'niveau', 'nom', 'url']

For consommables:
['Type', 'description', 'effets', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'conditions', 'description', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'description', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['erreur 404', 'url']
['Type', 'conditions', 'description', 'effets', 'illustration_url', 'niveau', 'nom', 'recette', 'url']
['Type', 'conditions', 'description', 'illustra

In [33]:
key = 'conditions'
for d in data.values():
    for item in d:
        if key in item:
            print(item[key])

{'null': [{'special': "Quête 'Rencontres d’un soir' achevée"}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculptemage'}]}
{'ou': [{'special': 'Forgemage'}, {'special': 'Sculpte