In [1]:
import pandas as pd
import json
import unicodedata
import numpy as np
import requests
import re
from SPARQLWrapper import SPARQLWrapper2

In [2]:
df=pd.read_csv("Data/commune2021.csv")
communes_ = np.array(df["LIBELLE"])
communes_

array(["L'Abergement-Clémenciat", "L'Abergement-de-Varey",
       'Ambérieu-en-Bugey', ..., 'Pamandzi', 'Sada', 'Tsingoni'],
      dtype=object)

In [3]:
communes, counts = np.unique(communes_, return_counts=True)

In [5]:
len(communes), len(communes_)

(35084, 37742)

In [81]:
def get_insee(commune):
    """
    returns the insee code of the commune
    """
    commune_ = unicodedata.normalize('NFD', commune.lower()).encode('ascii', 'ignore').decode("utf-8")
    url = 'https://geo.api.gouv.fr/communes?nom={c}&fields=nom,code,codesPostaux,'\
              'codeDepartement,codeRegion,population&format=json&geometry=centre'\
              .format(c=commune_)
    exists = len(json.loads(requests.get(url).text))
    if exists>0 :
        codes = json.loads(requests.get(url).text)
        result = [code["code"] for code in codes \
                if  unicodedata.normalize('NFD', code["nom"].lower()).encode('ascii', 'ignore').decode("utf-8") == commune_]
        if len(result)>0:
            return result
        return -1
    return -1

In [132]:
def dbPedia_get_gentile(insee):
    """
    Returns the dbo:peopleName property of the commune corresponding to the insee code
    """
    
    d = SPARQLWrapper2("http://fr.dbpedia.org/sparql")

    d.setQuery(
    """
    PREFIX dbo: <http://dbpedia.org/ontology/>
    PREFIX foaf: <http://xmlns.com/foaf/0.1/>

    SELECT distinct ?ville ?gentile
    WHERE
    {
      ?ville dbo:peopleName  ?gentile.
      ?ville dbo:country  <http://fr.dbpedia.org/resource/France>.
      ?ville dbo:inseeCode ?insee.
      filter(regex(?insee,\"^""" +insee+ """\", "i"))
    }
      """
    ) 
    communes = set()
    res = d.query().bindings
    for result in res:
        communes.add(unicodedata.normalize('NFD', result["gentile"].value.lower()).encode('ascii', 'ignore').decode("utf-8"))
        
    return communes

### For each commune, extract from dbPedia the dbo:peopleName property if it exists

In [155]:
# gentiles = {}
# for commune in communes:
#     print(commune)
#     insee = get_insee(commune)
#     if insee != -1: 
#         result = set()
#         for insee_ in insee:
#             g = dbPedia_get_gentile(insee_)
#             result  = result.union(g)
        
#         if(len(result)>0):
#             gentiles[commune] = list(result)
        
#         print(commune, result)
#         print("____________________________________________________")  
        
# with open("Data/dbPedia_extracted_demonyms2.json", "w") as fp:
#     json.dump(gentiles,fp)

### Preprocess extracted text

In [150]:
from nltk.stem.snowball import FrenchStemmer
stemmer = FrenchStemmer()

gentiles_ = {}
for com in gentiles:
    list_g = gentiles[com]
    result = set()
    for g in list_g:
        g1 = re.sub("\(.*\)", "", g)
        g2 = re.sub("\sou\s|\set\s|;|/", ",", g1)
        print(g2)
        split_g = re.split("\s?,,?\s?", g2)
        print(split_g)
        for split_g_ in split_g:  
            result.add(split_g_)
    result = list(result)
    gentiles_[com] = result   
    print(com, list_g, result)

aastais
['aastais']
Aast ['aastais'] ['aastais']
abainvillois
['abainvillois']
Abainville ['abainvillois'] ['abainvillois']
abancourtois
['abancourtois']
Abancourt ['abancourtois'] ['abancourtois']
abaucourtois
['abaucourtois']
Abaucourt-Hautecourt ['abaucourtois'] ['abaucourtois']
abbarois
['abbarois']
Abbaretz ['abbarois'] ['abbarois']
abbecourtois
['abbecourtois']
Abbecourt ['abbecourtois'] ['abbecourtois']
abbenanais
['abbenanais']
Abbenans ['abbenanais'] ['abbenanais']
abbevillois
['abbevillois']
Abbeville ['abbevillois(e)'] ['abbevillois']
abbevillois
['abbevillois']
Abbeville-Saint-Lucien ['abbevillois'] ['abbevillois']
abbecourtois
['abbecourtois']
Abbécourt ['abbecourtois'] ['abbecourtois']
abbevillois
['abbevillois']
Abbéville-la-Rivière ['abbevillois'] ['abbevillois']
abeilhanais
['abeilhanais']
Abeilhan ['abeilhanais'] ['abeilhanais']
abergeois
['abergeois']
Abergement-la-Ronce ['abergeois'] ['abergeois']
abergementiers, abergementieres
['abergementiers', 'abergementieres']

ballansois
['ballansois']
Ballans ['ballansois'] ['ballansois']
biardais
['biardais']
Balleroy-sur-Drôme ['biardais'] ['biardais']
ballersdorfois, ballersdorfoises
['ballersdorfois', 'ballersdorfoises']
Ballersdorf ['ballersdorfois, ballersdorfoises'] ['ballersdorfoises', 'ballersdorfois']
ballonais
['ballonais']
Ballon ['ballonais'] ['ballonais']
ballonnais
['ballonnais']
Ballon-Saint Mars ['ballonnais'] ['ballonnais']
ballocais
['ballocais']
Ballots ['ballocais'] ['ballocais']
balloyeux
['balloyeux']
Balloy ['balloyeux'] ['balloyeux']
berlevillois
['berlevillois']
Balléville ['berlevillois(es)'] ['berlevillois']
balmanais, balmanaises
['balmanais', 'balmanaises']
Balma ['balmanais, balmanaises'] ['balmanaises', 'balmanais']
balugnesi
['balugnesi']
balognais
['balognais']
Balogna ['balugnesi', 'balognais'] ['balognais', 'balugnesi']
balschwillerois
['balschwillerois']
Balschwiller ['balschwillerois'] ['balschwillerois']
baltzenheimois, baltzenheimoises
['baltzenheimois', 'baltzenheimo

['bourgeois']
Bourg-Charente ['bourgeois'] ['bourgeois']
bourguesans, bourguesanes
['bourguesans', 'bourguesanes']
Bourg-Saint-Andéol ['bourguesans, bourguesanes'] ['bourguesans', 'bourguesanes']
bourguignons
['bourguignons']
Bourg-Saint-Bernard ['bourguignons'] ['bourguignons']
bourtoirs
['bourtoirs']
Bourg-Saint-Christophe ['bourtoirs'] ['bourtoirs']
borains , borainnes
['borains', 'borainnes']
Bourg-Saint-Maurice ['borains / borainnes'] ['borainnes', 'borains']
bourgiens
['bourgiens']
Bourg-d'Oueil ['bourgiens'] ['bourgiens']
peagois,peagoise
['peagois', 'peagoise']
Bourg-de-Péage ['peagois et peagoise'] ['peagoise', 'peagois']
visa-bourgiens
['visa-bourgiens']
Bourg-de-Visa ['visa-bourgiens'] ['visa-bourgiens']
bourgcomptois
['bourgcomptois']
Bourg-des-Comptes ['bourgcomptois'] ['bourgcomptois']
burgiens
['burgiens']
Bourg-en-Bresse ['burgiens'] ['burgiens']
bourcominois, bourcominoises
['bourcominois', 'bourcominoises']
Bourg-et-Comin ['bourcominois, bourcominoises'] ['bourcominoi

Chambéry ['chamberiens'] ['chamberiens']
chamayot
['chamayot']
Chamery ['chamayot'] ['chamayot']
chamessonnais
['chamessonnais']
Chamesson ['chamessonnais'] ['chamessonnais']
chameyracois, chameyracoises.
['chameyracois', 'chameyracoises.']
Chameyrat ['chameyracois, chameyracoises.'] ['chameyracois', 'chameyracoises.']
chamignots
['chamignots']
Chamigny ['chamignots'] ['chamignots']
chamolais, chamolaises
['chamolais', 'chamolaises']
Chamole ['chamolais, chamolaises'] ['chamolais', 'chamolaises']
chamoniards, chamoniardes
['chamoniards', 'chamoniardes']
Chamonix-Mont-Blanc ['chamoniards, chamoniardes'] ['chamoniardes', 'chamoniards']
chamouillacais
['chamouillacais']
Chamouillac ['chamouillacais'] ['chamouillacais']
campo-haltiens
['campo-haltiens']
Champ-Haut ['campo-haltiens'] ['campo-haltiens']
laurentains
['laurentains']
Champ-Laurent ['laurentains'] ['laurentains']
ducecampiens
['ducecampiens']
Champ-le-Duc ['ducecampiens'] ['ducecampiens']
champagnacois,champagnacoise
['champagna

croquants
['croquants']
Crocq ['croquants'] ['croquants']
croceens
['croceens']
Crocy ['croceens'] ['croceens']
croignonnais
['croignonnais']
Croignon ['croignonnais'] ['croignonnais']
croisettois
['croisettois']
Croisette ['croisettois'] ['croisettois']
croisillais
['croisillais']
Croisilles ['croisillais'] ['croisillais']
croisseens
['croisseens']
Croissy-Beaubourg ['croisseens'] ['croisseens']
croissillons
['croissillons']
Croissy-sur-Seine ['croissillons'] ['croissillons']
croyens
['croyens']
croisiens
['croisiens']
Croix ['croyens', 'croisiens'] ['croyens', 'croisiens']
cruci-capetiens,croix-chapeautais
['cruci-capetiens', 'croix-chapeautais']
Croix-Chapeau ['cruci-capetiens ou croix-chapeautais'] ['croix-chapeautais', 'cruci-capetiens']
croixanvecois, croixanvecoise
['croixanvecois', 'croixanvecoise']
Croixanvec ['croixanvecois, croixanvecoise'] ['croixanvecoise', 'croixanvecois']
crollois
['crollois']
Crolles ['crollois'] ['crollois']
cronatiens
['cronatiens']
Cronat ['cronatien

Fontenay-le-Marmion ['fontenaysiens'] ['fontenaysiens']
fontenassiens
['fontenassiens']
Fontenay-le-Pesnel ['fontenassiens'] ['fontenassiens']
fontenois
['fontenois']
Fontenay-le-Vicomte ['fontenois'] ['fontenois']
fontenaysiens
['fontenaysiens']
Fontenay-lès-Briis ['fontenaysiens'] ['fontenaysiens']
fontenaysiens
['fontenaysiens']
Fontenay-sous-Bois ['fontenaysiens'] ['fontenaysiens']
fontenaysiens
['fontenaysiens']
Fontenay-sur-Loing ['fontenaysiens'] ['fontenaysiens']
fontenaysien
['fontenaysien']
Fontenay-sur-Vègre ['fontenaysien'] ['fontenaysien']
fontenellois
['fontenellois']
Fontenelle ['fontenellois'] ['fontenellois']
fontenesiens
['fontenesiens']
Fontenet ['fontenesiens'] ['fontenesiens']
fontenillois
['fontenillois']
Fontenille ['fontenillois'] ['fontenillois']
fontenillois, fontenilloises
['fontenillois', 'fontenilloises']
Fontenilles ['fontenillois, fontenilloises'] ['fontenillois', 'fontenilloises']
fontenoisien
['fontenoisien']
Fontenois-lès-Montbozon ['fontenoisien(e)'] 

Hondouville ['hondouvillais'] ['hondouvillais']
hondschootois
['hondschootois']
Hondschoote ['hondschootois'] ['hondschootois']
honfleurais
['honfleurais']
Honfleur ['honfleurais'] ['honfleurais']
hunniclusiens
['hunniclusiens']
Honnechy ['hunniclusiens'] ['hunniclusiens']
hontanois
['hontanois']
Hontanx ['hontanois'] ['hontanois']
hordinois
['hordinois']
Hordain ['hordinois'] ['hordinois']
horguais
['horguais']
Horgues ['horguais'] ['horguais']
hornaingeois
['hornaingeois']
Hornaing ['hornaingeois'] ['hornaingeois']
hoztar
['hoztar']
Hosta ['hoztar'] ['hoztar']
hostensois
['hostensois']
Hostens ['hostensois'] ['hostensois']
hottotais
['hottotais']
Hottot-les-Bagues ['hottotais'] ['hottotais']
houdanais
['houdanais']
Houdan ['houdanais'] ['houdanais']
houdelaincourtois
['houdelaincourtois']
Houdelaincourt ['houdelaincourtois'] ['houdelaincourtois']
houeillessais, houeillessaises
['houeillessais', 'houeillessaises']
Houeillès ['houeillessais, houeillessaises'] ['houeillessaises', 'houei

Laguian-Mazous ['laguianais'] ['laguianais']
liginagar
['liginagar']
Laguinge-Restoue ['liginagar'] ['liginagar']
laguiolais
['laguiolais']
Laguiole ['laguiolais(e)'] ['laguiolais']
gupiais
['gupiais']
Lagupie ['gupiais'] ['gupiais']
guepiens
['guepiens']
Laguépie ['guepiens'] ['guepiens']
hageois, hageoises
['hageois', 'hageoises']
Lahage ['hageois, hageoises'] ['hageois', 'hageoises']
lahiterois, lahiteroises
['lahiterois', 'lahiteroises']
Lahitère ['lahiterois, lahiteroises'] ['lahiteroises', 'lahiterois']
lehunztar
['lehunztar']
Lahonce ['lehunztar'] ['lehunztar']
laignevillois
['laignevillois']
Laigneville ['laignevillois'] ['laignevillois']
laigneens
['laigneens']
Laigné-en-Belin ['laigneens'] ['laigneens']
laillylois
['laillylois']
Lailly-en-Val ['laillylois'] ['laillylois']
lailleens
['lailleens']
Laillé ['lailleens'] ['lailleens']
limounis
['limounis']
Laimont ['limounis'] ['limounis']
lainois
['lainois']
Lain ['lainois'] ['lainois']
lainsecquois
['lainsecquois']
Lainsecq ['la

['livryens']
Livry-Gargan ['livryens'] ['livryens']
livryens
['livryens']
Livry-sur-Seine ['livryens'] ['livryens']
livreens
['livreens']
Livré-la-Touche ['livreens'] ['livreens']
livreens, livreennes
['livreens', 'livreennes']
Livré-sur-Changeon ['livreens, livreennes'] ['livreennes', 'livreens']
lizacais
['lizacais']
Lizac ['lizacais'] ['lizacais']
lizerains
['lizerains']
Lizeray ['lizerains'] ['lizerains']
lizinois
['lizinois']
Lizines ['lizinois'] ['lizinois']
liziotais
['liziotais']
Lizio ['liziotais'] ['liziotais']
lizeens
['lizeens']
Lizy-sur-Ourcq ['lizeens'] ['lizeens']
colieuvres
['colieuvres']
Lièpvre ['colieuvres'] ['colieuvres']
lievinois
['lievinois']
Liévin ['lievinois'] ['lievinois']
pouhas
['pouhas']
Liézey ['pouhas'] ['pouhas']
llotois
['llotois']
Llo ['llotois'] ['llotois']
brevalairiens
['brevalairiens']
Loc-Brévalaire ['brevalairiens'] ['brevalairiens']
loguisiens
['loguisiens']
Loc-Eguiner ['loguisiens'] ['loguisiens']
locenvellois, locenvelloise
['locenvellois', 

['moissonnais']
Moisson ['moissonnais'] ['moissonnais']
moisseens
['moisseens']
Moissy-Cramayel ['moisseens'] ['moisseens']
messiens
['messiens']
Moisy ['messiens'] ['messiens']
moitronnais
['moitronnais']
Moitron-sur-Sarthe ['moitronnais'] ['moitronnais']
molacais
['molacais']
molacois
['molacois']
Molac ['molacais', 'molacois'] ['molacais', 'molacois']
molassiens
['molassiens']
Molas ['molassiens'] ['molassiens']
molaysiens,molaysiennes
['molaysiens', 'molaysiennes']
Molay ['molaysiens et molaysiennes'] ['molaysiens', 'molaysiennes']
moliennois
['moliennois']
Moliens ['moliennois'] ['moliennois']
molierois
['molierois']
gracieux-divins
['gracieux-divins']
Molières ['molierois', 'gracieux-divins'] ['molierois', 'gracieux-divins']
molierois, molieroises
['molierois', 'molieroises']
Molières-sur-Cèze ['molierois, molieroises'] ['molieroises', 'molierois']
mollanais, mollanaises
['mollanais', 'mollanaises']
Mollans-sur-Ouvèze ['mollanais, mollanaises'] ['mollanaises', 'mollanais']
molavi

['nuailleens']
Nuaillé-sur-Boutonne ['nuailleens'] ['nuailleens']
nucourtois
['nucourtois']
Nucourt ['nucourtois'] ['nucourtois']
nueillaubrais
['nueillaubrais']
Nueil-les-Aubiers ['nueillaubrais'] ['nueillaubrais']
jalaisien
['jalaisien']
Nuillé-le-Jalais ['jalaisien'] ['jalaisien']
nuilleen
['nuilleen']
Nuillé-sur-Vicoin ['nuilleen'] ['nuilleen']
nuitons
['nuitons']
Nuits-Saint-Georges ['nuiton(ne)s'] ['nuitons']
nuncquois
['nuncquois']
Nuncq-Hautecôte ['nuncquois'] ['nuncquois']
ferronnais
['ferronnais']
Nuret-le-Ferron ['ferronnais'] ['ferronnais']
nyerois
['nyerois']
Nyer ['nyerois'] ['nyerois']
nyonsais
['nyonsais']
Nyons ['nyonsais'] ['nyonsais']
negrepelissiens
['negrepelissiens']
Nègrepelisse ['negrepelissiens'] ['negrepelissiens']
neantais, neantaise
['neantais', 'neantaise']
Néant-sur-Yvel ['neantais, neantaise'] ['neantaise', 'neantais']
nebianais
['nebianais']
Nébian ['nebianais'] ['nebianais']
nebiassais
['nebiassais']
Nébias ['nebiassais'] ['nebiassais']
negrevillais
['n

['preslois']
Presles-en-Brie ['preslois'] ['preslois']
pressecagniens
['pressecagniens']
Pressagny-l'Orgueilleux ['pressecagniens'] ['pressecagniens']
pressignacois
['pressignacois']
Pressignac ['pressignacois'] ['pressignacois']
pressigniens
['pressigniens']
Pressigny-les-Pins ['pressigniens'] ['pressigniens']
pressinois
['pressinois']
Pressins ['pressinois'] ['pressinois']
preuillavillois
['preuillavillois']
Preuilly-la-Ville ['preuillavillois'] ['preuillavillois']
prulliaciens
['prulliaciens']
Preuilly-sur-Claise ['prulliaciens'] ['prulliaciens']
preurois, preuroises
['preurois', 'preuroises']
Preures ['preurois, preuroises'] ['preuroises', 'preurois']
priogniens
['priogniens']
Priay ['priogniens'] ['priogniens']
prignacais
['prignacais']
Prignac ['prignacais'] ['prignacais']
prignacais
['prignacais']
Prignac-et-Marcamps ['prignacais'] ['prignacais']
prigontins, prigontines
['prigontins', 'prigontines']
Prigonrieux ['prigontins, prigontines'] ['prigontines', 'prigontins']
primelinoi

casselards , casselardes
['casselards', 'casselardes']
Saint-Didier-de-la-Tour ['casselards / casselardes'] ['casselardes', 'casselards']
saint-didiatons
['saint-didiatons']
Saint-Didier-sur-Beaujeu ['saint-didiatons'] ['saint-didiatons']
desideriens
['desideriens']
Saint-Didier-sur-Chalaronne ['desideriens'] ['desideriens']
saint-dionizyens, saint-dionizyennes
['saint-dionizyens', 'saint-dionizyennes']
Saint-Dionisy ['saint-dionizyens, saint-dionizyennes'] ['saint-dionizyennes', 'saint-dionizyens']
saint-divyens
['saint-divyens']
Saint-Divy ['saint-divyens'] ['saint-divyens']
saint-dizannais
['saint-dizannais']
Saint-Dizant-du-Bois ['saint-dizannais'] ['saint-dizannais']
saint-dizanais
['saint-dizanais']
Saint-Dizant-du-Gua ['saint-dizanais'] ['saint-dizanais']
bragards
['bragards']
Saint-Dizier ['bragards'] ['bragards']
fechois
['fechois']
Saint-Dizier-l'Évêque ['fechois'] ['fechois']
deodatiens
['deodatiens']
Saint-Dié-des-Vosges ['deodatien(ne)s'] ['deodatiens']
dolaysien, dolaysie

pragoulinois, pragoulinoises
['pragoulinois', 'pragoulinoises']
Saint-Sylvestre-Pragoulin ['pragoulinois, pragoulinoises'] ['pragoulinois', 'pragoulinoises']
paroupians
['paroupians']
saint-symphorinois
['saint-symphorinois']
Saint-Symphorien ['paroupians', 'saint-symphorinois'] ['paroupians', 'saint-symphorinois']
sanfourio
['sanfourio']
Saint-Symphorien-de-Mahun ['sanfourio'] ['sanfourio']
saint-symphorienois
['saint-symphorienois']
Saint-Symphorien-de-Thénières ['saint-symphorienois(e)'] ['saint-symphorienois']
sensfrinoises,sensfrinois
['sensfrinoises', 'sensfrinois']
Saint-Symphorien-des-Bois ['sensfrinoises et sensfrinois'] ['sensfrinoises', 'sensfrinois']
saint-sympherinolais
['saint-sympherinolais']
Saint-Symphorien-des-Bruyères ['saint-sympherinolais'] ['saint-sympherinolais']
pelauds
['pelauds']
Saint-Symphorien-sur-Coise ['pelauds'] ['pelauds']
sebamorsentins
['sebamorsentins']
Saint-Sébastien-de-Morsent ['sebamorsentins'] ['sebamorsentins']
sebastienots
['sebastienots']
Sai

Teurthéville-Bocage ['teurthevillais'] ['teurthevillais']
teurthevillais
['teurthevillais']
Teurthéville-Hague ['teurthevillais'] ['teurthevillais']
thaimois
['thaimois']
Thaims ['thaimois'] ['thaimois']
thairesiens
['thairesiens']
Thairé ['thairesiens'] ['thairesiens']
thannois
['thannois']
Thann ['thannois'] ['thannois']
thanvilleens
['thanvilleens']
Thanvillé ['thanvilleen(ne)s'] ['thanvilleens']
thaonnais
['thaonnais']
Thaon ['thaonnais'] ['thaonnais']
rabitheillois
['rabitheillois']
Theil-Rabier ['rabitheillois'] ['rabitheillois']
theixois
['theixois']
Theix-Noyalo ['theixois'] ['theixois']
thenaisiens
['thenaisiens']
Thenay ['thenaisiens'] ['thenaisiens']
theneuillais
['theneuillais']
Theneuil ['theneuillais'] ['theneuillais']
therdonniens
['therdonniens']
Therdonne ['therdonniens'] ['therdonniens']
theuvillois, theuvilloises
['theuvillois', 'theuvilloises']
Theuville ['theuvillois, theuvilloises'] ['theuvilloises', 'theuvillois']
theuvillais
['theuvillais']
Theuville-aux-Maillot

Villeneuvette ['villeneuvettois'] ['villeneuvettois']
villenouvellois, villenouvelloises
['villenouvellois', 'villenouvelloises']
Villenouvelle ['villenouvellois, villenouvelloises'] ['villenouvelloises', 'villenouvellois']
villenoisiens,villenoyens
['villenoisiens', 'villenoyens']
Villenoy ['villenoisiens ou villenoyens'] ['villenoisiens', 'villenoyens']
villentroyens
['villentroyens']
Villentrois-Faverolles-en-Berry ['villentroyens'] ['villentroyens']
villepaillais
['villepaillais']
Villepail ['villepaillais'] ['villepaillais']
villeparisiens
['villeparisiens']
Villeparisis ['villeparisiens'] ['villeparisiens']
villeperdusiens
['villeperdusiens']
Villeperdue ['villeperdusiens'] ['villeperdusiens']
villepintois
['villepintois']
Villepinte ['villepintois'] ['villepintois']
villeporcherois
['villeporcherois']
Villeporcher ['villeporcherois'] ['villeporcherois']
villepotais
['villepotais']
Villepot ['villepotais'] ['villepotais']
villepreusien, villepreusienne
['villepreusien', 'villepre

In [152]:
# # save in file
# with open("Data/dbPedia_extracted_demonyms3.json", "w") as fp:
#     json.dump(gentiles_,fp)  

In [160]:
json_file1 = open("Data/dbPedia_extracted_demonyms3.json")
json_file2 = open("Data/infobox_extracted_demonyms.json")
gentiles1 = json.load(json_file1)
gentiles2 = json.load(json_file2)
len(gentiles1), len(gentiles2)

(15297, 22792)

In [163]:
s1 = set(list(gentiles1.keys()))
s2 = set(list(gentiles2.keys()))

In [164]:
len(s1), len(s2)

(15297, 22792)

In [165]:
s3 = s1.union(s2)

In [166]:
len(s3)

22905