# Mise en commun du code et du parser pour jeux de mots

In [38]:
import requests
import pandas as pd 
from os.path import exists

# Arborescence du sysème de fichier 
PATH_REQUEST = "data/requests/"
PATH_DEF  = "data/def/"
PATH_E    = "data/e/"
PATH_NT   = "data/nt/"
PATH_RE   = "data/re/"
PATH_RS   = "data/rs/"
PATH_RT   = "data/rt/"

# Exrension des fichiers 
EXT_REQUEST = ".txt"
EXT_DEF  = "_def.csv"
EXT_E    = "_e.csv"
EXT_NT   = "_nt.csv"
EXT_RE   = "_re.csv"
EXT_RS   = "_rs.csv"
EXT_RT   = "_rt.csv"

class Terme: 
    def __init__(self, mot):
        self.mot = mot

        if self.isKnow(): 
            self.load()
        else: 
            self.download()

    def getMot(self): 
        return self.mot

    def getID(self): 
        return self.id

    def isKnow(self):
        return exists(PATH_REQUEST + self.mot + EXT_REQUEST)

    def request(self): 
        url = 'https://www.jeuxdemots.org/rezo-dump.php?gotermsubmit=Chercher&gotermrel=' + self.mot + '&rel='
        headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
        r = requests.post(url, headers=headers)

        # Enregistrer la requetes dans un fichier text
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "w")
        f.write(r.text)
        f.close()

        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()
        f.close()

        return "<br>//&nbsp; &nbsp; &nbsp; WARNING TROP GROS.<br>TOWARD CACHE<br>\n" not in lines

    def download(self):
        i = 0

        while not self.request() and i < 5: 
            print("Pb download")
            i += 1

        self.load()

    def load(self):
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()

        code = [lines.index('<CODE>\n'), lines.index('</CODE>\n') +1]
        #print(code)

        index = {}

        for i in range(code[0], code[1]):
            if lines[i] == "<def>\n":
                index["def"] = i

            if lines[i] == "// les types de noeuds (Nodes Types) : nt;ntid;'ntname'\n":
                index["nt"] = i

            if lines[i] == "// les noeuds/termes (Entries) : e;eid;'name';type;w;'formated name' \n":
                index["e"] = i

            if lines[i] == "// les types de relations (Relation Types) : rt;rtid;'trname';'trgpname';'rthelp' \n":
                index["rt"] = i

            if lines[i] == "// les relations sortantes : r;rid;node1;node2;type;w \n":
                index["rs"] = i

            if lines[i] == "// les relations entrantes : r;rid;node1;node2;type;w \n":
                index["re"] = i 

            if lines[i] == "// END\n":
                index["end"] = i 

        #index
        m_def   = open(PATH_DEF + self.mot + EXT_DEF, "w")
        m_nt    = open(PATH_NT  + self.mot + EXT_NT,  "w")
        m_e     = open(PATH_E   + self.mot + EXT_E,   "w")
        m_rt    = open(PATH_RT  + self.mot + EXT_RT,  "w")
        m_rs    = open(PATH_RS  + self.mot + EXT_RS,  "w")
        m_re    = open(PATH_RE  + self.mot + EXT_RE,  "w")

        # Head
        m_nt.write("nt;ntid;ntname\n")
        m_e.write("e;eid;name;type;w;help\n")
        m_rt.write("rt;rtid;trname;trgpname;rthelp\n")
        m_rs.write("r;rid;node1;node2;type;w\n")
        m_re.write("r;rid;node1;node2;type;w\n")

        for i in range(index['def'], index['nt']):
            m_def.write(lines[i])

        for i in range(index['nt']+2, index['e']):
            m_nt.write(lines[i].replace("'", ""))

        for i in range(index['e']+2, index['rt']):
            if lines[i].count(";") <= 5:
                m_e.write(lines[i].replace("'", ""))

        for i in range(index['rt']+2, index['rs']):
            m_rt.write(lines[i].replace(" ; ", " ").replace("'", "")) # Modifier les séparateur pour éviter les pb à l'ouverture

        if 're' in index and 'rs' in index: 
            for i in range(index['rs']+2, index['re']):
                m_rs.write(lines[i])

            for i in range(index['re']+2, index['end']):
                m_re.write(lines[i])
        elif 'rs' in index: 
            for i in range(index['rs']+2, index['end']):
                m_rs.write(lines[i])
        elif 're' in index: 
            for i in range(index['re']+2, index['end']):
                m_re.write(lines[i])

        m_def.close()
        m_nt.close()
        m_e.close()
        m_rt.close()
        m_rs.close()
        m_re.close()

        self.E  = pd.read_csv(PATH_E  + self.mot + EXT_E,  sep=";")
        self.NT = pd.read_csv(PATH_NT + self.mot + EXT_NT, sep=";")
        self.RE = pd.read_csv(PATH_RE + self.mot + EXT_RE, sep=";")
        self.RS = pd.read_csv(PATH_RS + self.mot + EXT_RS, sep=";")
        self.RT = pd.read_csv(PATH_RT + self.mot + EXT_RT, sep=";")

        self.id = int(self.E.loc[self.E['name'] == self.mot]['eid'])


In [43]:
A = Terme("toto")
B = Terme("Suisse")
C = Terme("skate")
D = Terme("bite")
E = Terme("drogue")
F = Terme("sale")
G = Terme("surprise")
H = Terme("bonheur")

In [44]:
def relationDistance1(A, B): 
    relation = A.RS.loc[A.RS['node2'] == B.getID()]
    relation = relation['type']

    for i in relation: 
        print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + B.getMot())
    

In [72]:
def relationDistance2(A, B):
    c = 0
    termes = []

    for eidS in A.RS['node2']: 
        tmp = len(B.RE.loc[B.RE['node1'] == eidS])
        if tmp > 0: 
            termes.append(A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False))

        c += tmp 

    print(termes)
    print(c)

    for terme in termes: 
        if " " not in terme and "<" not in terme and">" not in terme and":" not in terme and"\x9c" not in terme: 
            print(" ========================= " + terme + " ========================= ")
            AB = Terme(terme)
            relationDistance1(A, AB)
            relationDistance1(AB, B)
            print("////////")

            relation_A_AB = A.RS.loc[A.RS['node2'] == AB.getID()]
            relation_A_AB = relation_A_AB['type']

            for i in relation_A_AB: 
                # Regarder la différence entre les deux lignes 
                #relation_AB_B = AB.RS.loc[AB.RS['node2'] == B.getID()] 
                relation_AB_B = B.RE.loc[B.RE['node1'] == AB.getID()]
                
                relation_AB_B = relation_AB_B['type']

                for j in relation_AB_B:
                    print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + AB.getMot() + " " + AB.RT.loc[AB.RT['rtid'] == j]['trgpname'].to_string(index=False) + " " + B.getMot())

            


In [73]:
#relationDistance1(A, F)
relationDistance2(A, F)

['acide désoxyribonucléique', 'adn', 'pied', 'oeil', 'yeux', 'corps', 'ADN', 'oeil>330323', 'pediculus humanus', 'maladie', 'cheveux', 'sale', 'corps>112609', 'patte', 'pou de lhomme', 'oeil>44335', 'Pediculus humanus', 'en:louse', 'se reproduire', 'animal', 'politique', 'tête', 'pou', 'toto>88245', 'politique', 'pou de lhomme', 'pediculus humanus', 'Pediculus humanus', 'en:louse', 'pou', 'animal', 'en:neck', 'en:DNA', 'en:eye', 'eye', '\x9cil', 'squelette', 'bassin>112609', 'neck', 'acide désoxyribo-nucléique', 'en:deoxyribonucleic acid', 'ADN', 'corps', 'oeil', 'tête', 'oeil>330323', 'acide désoxyribonucléique', 'oeil>44335', 'sexe', 'yeux', 'corps>112609', 'adn', 'bras', 'coeur', 'pied', 'bouche>112609', 'cou', 'main', 'squelette>112609', 'bouche', 'sexe>218086', 'jambes', 'nez', 'visage', 'jambe', 'patte', 'tête', 'mort', 'vivant', 'petit', 'toto', 'regarder', 'dormir', 'parler', 'mourir', 'mourir>126720', 'vivre>160251', 'se reproduire', 'vivre', 'indifférence', 'calcul', 'pou', '

## Tests

In [69]:
relation = A.RS.loc[A.RS['node2'] == B.getID()]

relation = relation['type']


for i in relation: 
    print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + B.getMot())

toto idée associée Suisse
toto glose/sens/signification Suisse


In [None]:
tmp = 0 
corr = 0

for eid1 in eids: 
    for eid2 in eids : 
        tmp = R.loc[R['node1'] == str(eid1)].loc[R['node2'] == str(eid2)]['w'].astype('int32').sum()
        corr += tmp
        if tmp != 0 : 
            print(str(tmp) + " : " + str(eids[eid1]) + " -> " + str(eids[eid2]))
            
        #print("tmp : " + str(tmp)) 
        #print("sum : " + str(corr))

print(corr)