# Mise en commun du code et du parser pour jeux de mots

In [29]:
import time
import requests
import shutil
from os import remove, mkdir, rmdir
from statistics import mean, geometric_mean, harmonic_mean
from os.path import exists
import pandas as pd 
from os.path import exists
from alive_progress import alive_bar

# Arborescence du sysème de fichier 
PATH            = "data/"
PATH_REQUEST    = "data/requests/"
PATH_DEF        = "data/def/"
PATH_E          = "data/e/"
PATH_NT         = "data/nt/"
PATH_RE         = "data/re/"
PATH_RS         = "data/rs/"
PATH_RT         = "data/rt/"

# Extension des fichiers 
EXT_REQUEST     = ".txt"
EXT_DEF         = "_def.csv"
EXT_E           = "_e.csv"
EXT_NT          = "_nt.csv"
EXT_RE          = "_re.csv"
EXT_RS          = "_rs.csv"
EXT_RT          = "_rt.csv"

class Terme: 
    def __init__(self, mot):
        self.mot = mot

        if self.isKnow(): 
            self.load()
        else: 
            self.download()

    def getMot(self): 
        return self.mot

    def getID(self): 
        return self.id

    def isKnow(self):
        return exists(PATH_REQUEST + self.mot + EXT_REQUEST)

    def request(self): 
        url = 'https://www.jeuxdemots.org/rezo-dump.php?gotermsubmit=Chercher&gotermrel=' + self.mot + '&rel='
        headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
        r = requests.post(url, headers=headers)

        # Enregistrer la requetes dans un fichier text
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "w")
        f.write(r.text)
        f.close()

        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()
        f.close()

        requestIsGood = "<br>//&nbsp; &nbsp; &nbsp; WARNING TROP GROS.<br>TOWARD CACHE<br>\n" not in lines and "<CODE>MUTED_PLEASE_RESEND\n" not in lines and "<div class=\"jdm-warning\"><br>Le terme '" + self.mot + "' n'existe pas !</div></div>\n"

        if not requestIsGood: 
            remove(PATH_REQUEST + self.mot + EXT_REQUEST)
            

        return requestIsGood

    def download(self):
        i = 0

        while not self.request() and i < 5: 
            i += 1

        self.load()

    def load(self):
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()
        try : 
            code = [lines.index('<CODE>\n'), lines.index('</CODE>\n') +1]
        except ValueError: 
            code = [0, 0]

        index = {}

        for i in range(code[0], code[1]):
            if lines[i] == "<def>\n":
                index["def"] = i

            if lines[i] == "// les types de noeuds (Nodes Types) : nt;ntid;'ntname'\n":
                index["nt"] = i

            if lines[i] == "// les noeuds/termes (Entries) : e;eid;'name';type;w;'formated name' \n":
                index["e"] = i

            if lines[i] == "// les types de relations (Relation Types) : rt;rtid;'trname';'trgpname';'rthelp' \n":
                index["rt"] = i

            if lines[i] == "// les relations sortantes : r;rid;node1;node2;type;w \n":
                index["rs"] = i

            if lines[i] == "// les relations entrantes : r;rid;node1;node2;type;w \n":
                index["re"] = i 

            if lines[i] == "// END\n":
                index["end"] = i 
    

        #index
        m_def   = open(PATH_DEF + self.mot + EXT_DEF, "w")
        m_nt    = open(PATH_NT  + self.mot + EXT_NT,  "w")
        m_e     = open(PATH_E   + self.mot + EXT_E,   "w")
        m_rt    = open(PATH_RT  + self.mot + EXT_RT,  "w")
        m_rs    = open(PATH_RS  + self.mot + EXT_RS,  "w")
        m_re    = open(PATH_RE  + self.mot + EXT_RE,  "w")

        # Head
        m_nt.write("nt;ntid;ntname\n")
        m_e.write("e;eid;name;type;w;help\n")
        m_rt.write("rt;rtid;trname;trgpname;rthelp\n")
        m_rs.write("r;rid;node1;node2;type;w\n")
        m_re.write("r;rid;node1;node2;type;w\n")

        try: 
            for i in range(index['def'], index['nt']):
                m_def.write(lines[i])

            for i in range(index['nt']+2, index['e']):
                m_nt.write(lines[i].replace("'", ""))

            for i in range(index['e']+2, index['rt']):
                if lines[i].count(";") <= 5:
                    m_e.write(lines[i].replace("'", ""))

            for i in range(index['rt']+2, index['rs']):
                m_rt.write(lines[i].replace(" ; ", " ").replace("'", "")) # Modifier les séparateur pour éviter les pb à l'ouverture

            if 're' in index and 'rs' in index: 
                for i in range(index['rs']+2, index['re']):
                    m_rs.write(lines[i])

                for i in range(index['re']+2, index['end']):
                    m_re.write(lines[i])
            elif 'rs' in index: 
                for i in range(index['rs']+2, index['end']):
                    m_rs.write(lines[i])
            elif 're' in index: 
                for i in range(index['re']+2, index['end']):
                    m_re.write(lines[i])
        except KeyError: 
            pass

        m_def.close()
        m_nt.close()
        m_e.close()
        m_rt.close()
        m_rs.close()
        m_re.close()

        self.E  = pd.read_csv(PATH_E  + self.mot + EXT_E,  sep=";")
        self.NT = pd.read_csv(PATH_NT + self.mot + EXT_NT, sep=";")
        self.RE = pd.read_csv(PATH_RE + self.mot + EXT_RE, sep=";")
        self.RS = pd.read_csv(PATH_RS + self.mot + EXT_RS, sep=";")
        self.RT = pd.read_csv(PATH_RT + self.mot + EXT_RT, sep=";")
        self.R  = pd.concat([self.RS, self.RE])

        if self.RS.shape[0] > 0:
            self.id = int(self.RS['node1'][0])
        else:
            self.id = -1

        #self.id = int(self.E.loc[self.E['name'] == self.mot]['eid'][0])

    def initMyLocalDataBase(): 
        if not exists(PATH): 
            mkdir(PATH)

        if not exists(PATH_DEF): 
            mkdir(PATH_DEF)

        if not exists(PATH_REQUEST): 
            mkdir(PATH_REQUEST)

        if not exists(PATH_E): 
            mkdir(PATH_E)

        if not exists(PATH_NT): 
            mkdir(PATH_NT)

        if not exists(PATH_RE): 
            mkdir(PATH_RE)

        if not exists(PATH_RS): 
            mkdir(PATH_RS)

        if not exists(PATH_RT): 
            mkdir(PATH_RT)

    def deleteMyLocalDataBase():
        if exists(PATH):
            shutil.rmtree(PATH)

    def isValidTerme(terme): 
        return " " not in terme and "<" not in terme and">" not in terme and":" not in terme and "\x9c" not in terme and "ï" not in terme and "_" not in terme

## Construction de l'arborescence 

In [30]:
Terme.initMyLocalDataBase() 

## Les fonctions pour extraire les relations

In [31]:
A = Terme("toto")
B = Terme("Suisse")
C = Terme("skate")
D = Terme("bite")
E = Terme("drogue")
F = Terme("sale")
G = Terme("gueez")
H = Terme("bonheur")
I = Terme("objet")

In [40]:
def relationDistance0(A, mean=mean, csv=False):
    relation = A.RS.loc[A.RS['node2'] == A.getID()]

    relation_A_B = {}

    with alive_bar(len(relation.index), force_tty=True) as bar:
        for i, row_i in relation.iterrows() : 
            relation_A_B[A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + A.getMot()] = row_i['w']
            bar() 

    relation_A_B_sorted = {}

    for i in sorted(relation_A_B, key=relation_A_B.get, reverse=True):
        relation_A_B_sorted[i] = relation_A_B[i]

    for r in relation_A_B_sorted: 
        print(str(r) + " : " + str(relation_A_B_sorted[r]))

    if csv: 
        pass #Ecrire en format csv dans un fichier 


    



In [41]:
relationDistance0(A)

|████████████████████████████████████████| 1/1 [100%] in 0.0s (26.96/s)                                                 
toto r_lemma toto : 55


In [43]:
def relationDistance1(A, B, mean=mean, csv=False): 
    relation = A.RS.loc[A.RS['node2'] == B.getID()]

    relation_A_B = {}

    with alive_bar(len(relation.index), force_tty=True) as bar:
        for i, row_i in relation.iterrows() : 
            relation_A_B[A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + B.getMot()] = row_i['w']
            bar()

    relation_A_B_sorted = {}

    for i in sorted(relation_A_B, key=relation_A_B.get, reverse=True):
        relation_A_B_sorted[i] = relation_A_B[i]

    for r in relation_A_B_sorted: 
        print(str(r) + " : " + str(relation_A_B_sorted[r]))

    if csv: 
        pass
    
     

In [44]:
relationDistance1(A, B)

|████████████████████████████████████████| 2/2 [100%] in 0.0s (57.34/s)                                                 
toto glose/sens/signification Suisse : 100
toto idée associée Suisse : 15


In [46]:
def relationDistance2_old(A, B, mean=mean, csv=False):
    termes = []
    with alive_bar(len(A.RS['node2']), force_tty=True, title="Download ") as bar:
        for eidS in A.RS['node2']: 
            tmp = len(B.RE.loc[B.RE['node1'] == eidS])
            
            if tmp > 0: 
                terme = A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False)
                if Terme.isValidTerme(terme):
                    termes.append(terme)
            bar() 
        

    termes = list(dict.fromkeys(termes))
    
    #print(termes)

    relation_A_B = {}

    with alive_bar(len(termes), force_tty=True, title="Search   ") as bar:
        for terme in termes: 
            AB = Terme(terme)

            relation_A_AB = A.RS.loc[A.RS['node2'] == AB.getID()]
            for i, row_i in relation_A_AB.iterrows(): 
                relation_AB_B = B.RE.loc[B.RE['node1'] == AB.getID()]

                for j, row_j in relation_AB_B.iterrows():
                    relation_A_B[A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + AB.getMot() + " " + B.RT.loc[B.RT['rtid'] == row_j['type']]['trgpname'].to_string(index=False) + " " + B.getMot()] = row_i['w'] + row_j['w'] / 2
                    
            bar()

    relation_A_B_sorted = {}

    for i in sorted(relation_A_B, key=relation_A_B.get, reverse=True):
        relation_A_B_sorted[i] = relation_A_B[i]

    for r in relation_A_B_sorted: 
        print(str(r) + " : " + str(relation_A_B_sorted[r]))

    if csv: 
        pass

In [47]:
relationDistance2_old(A, D)

Download  |████████████████████████████████████████| 422/422 [100%] in 0.1s (5066.58/s)                                 
Search    |████████████████████████████████████████| 12/12 [100%] in 0.6s (19.04/s)                                     
toto partie sexe idée associée bite : 102.5
toto glose/sens/signification type idée associée bite : 78.0
toto idée associée type idée associée bite : 74.0
toto glose/sens/signification type partie bite : 71.0
toto idée associée type partie bite : 67.0
toto caractéristique petit caractéristique-1 bite : 57.0
toto générique individu idée associée bite : 44.0
toto partie bouche lieu>chose bite : 40.0
toto partie main lieu>chose bite : 39.0
toto partie sexe partie bite : 38.5
toto partie corps idée associée bite : 38.0
toto générique individu partie bite : 37.5
toto partie corps partie bite : 33.0
toto sentiment indifférence sentiment-1 bite : 19.5
toto sentiment amour sentiment-1 bite : 19.0
toto sentiment bonheur sentiment-1 bite : 16.5
toto sentimen

In [49]:
def relationDistance2(A, B, mean=mean, csv=False):
    termes = []

    with alive_bar(len(A.RS['node2']), force_tty=True, title="Download ") as bar:
        for eidS in A.RS['node2']: 
            terme = A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False)

            if Terme.isValidTerme(terme):
                termes.append(terme)
            bar() 


    termes = list(dict.fromkeys(termes))
    
    #print(termes)
    #print(len(termes))

    relation_A_B = {}

    with alive_bar(len(termes), force_tty=True, title="Search   ") as bar:
        for terme in termes:     
            AB = Terme(terme)
            relation_A_AB = A.RS.loc[A.RS['node2'] == AB.getID()]

            for i, row_i in relation_A_AB.iterrows(): 
                relation_AB_B = AB.RS.loc[AB.RS['node2'] == B.getID()]

                for j, row_j in relation_AB_B.iterrows():
                    relation_A_B[A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + AB.getMot() + " " + B.RT.loc[B.RT['rtid'] == row_j['type']]['trgpname'].to_string(index=False) + " " + B.getMot()] = row_i['w'] + row_j['w'] / 2
            bar()

    relation_A_B_sorted = {}

    for i in sorted(relation_A_B, key=relation_A_B.get, reverse=True):
        relation_A_B_sorted[i] = relation_A_B[i]

    for r in relation_A_B_sorted: 
        print(str(r) + " : " + str(relation_A_B_sorted[r]))

    if csv: 
        pass

In [50]:
relationDistance2(A, G)

Download  |████████████████████████████████████████| 422/422 [100%] in 0.1s (4836.54/s)                                 
Search    |████████████████████████████████████████| 136/136 [100%] in 40.3s (3.37/s)                                   


In [53]:
def relationDistance3(A, D, mean=mean, csv=False): 
    # A -> B -> C -> D
    sortie_A = []

    for eidS in A.RS['node2']: 
        terme_A = A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False)

        if Terme.isValidTerme(terme=terme_A):
            B = Terme(terme_A)

            for eidS in B.RS['node2']: 
                terme_B = B.E.loc[B.E['eid'] == eidS]['name'].to_string(index=False)

                if Terme.isValidTerme(terme=terme_B):
                    C = Terme(terme_B)
        
                    for eidS in C.RS['node2']: 
                        terme_C = C.E.loc[C.E['eid'] == eidS]['name'].to_string(index=False)

                        if Terme.isValidTerme(terme=terme_C):
                            print(A.getMot() + " -> " + B.getMot() + " -> " + C.getMot() + " -> " + D.getMot(), end="\r")

                            if terme_C == D.getMot(): 
                                relation_A_B = A.RS.loc[A.RS['node2'] == B.getID()]
                                relation_B_C = B.RS.loc[B.RS['node2'] == C.getID()]
                                relation_C_D = C.RS.loc[C.RS['node2'] == D.getID()]

                                

                                for i, row_i in relation_A_B.iterrows(): 
                                    for j, row_j in relation_B_C.iterrows():
                                        for k, row_k in relation_C_D.iterrows(): 
                                            print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + 
                                                            B.getMot() + " " + B.RT.loc[B.RT['rtid'] == row_j['type']]['trgpname'].to_string(index=False) + " " + 
                                                            C.getMot() + " " + C.RT.loc[C.RT['rtid'] == row_k['type']]['trgpname'].to_string(index=False) + " "  + D.getMot() + " : " +
                                                            str(mean([row_i['w'], row_j['w'], row_k['w']])))

                                print("")


In [None]:
relationDistance3(A, B)

In [56]:
def relationDistance4(A, E, mean=mean, csv=False): 
    # A -> B -> C -> D -> E
    sortie_A = []

    for eidS in A.RS['node2']: 
        terme_A = A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False)

        if Terme.isValidTerme(terme=terme_A):
            B = Terme(terme_A)

            for eidS in B.RS['node2']: 
                terme_B = B.E.loc[B.E['eid'] == eidS]['name'].to_string(index=False)

                if Terme.isValidTerme(terme=terme_B):
                    C = Terme(terme_B)
        
                    for eidS in C.RS['node2']: 
                        terme_C = C.E.loc[C.E['eid'] == eidS]['name'].to_string(index=False)

                        if Terme.isValidTerme(terme=terme_C):
                            D = Terme(terme_C)
                
                            for eidS in D.RS['node2']: 
                                terme_D = D.E.loc[D.E['eid'] == eidS]['name'].to_string(index=False)

                                if Terme.isValidTerme(terme=terme_D):
                                    print(A.getMot() + " -> " + B.getMot() + " -> " + C.getMot() + " -> " + D.getMot(), " -> " + E.getMot(), end="\r")

                                    if terme_D == E.getMot(): 
                                        relation_A_B = A.RS.loc[A.RS['node2'] == B.getID()]
                                        relation_B_C = B.RS.loc[B.RS['node2'] == C.getID()]
                                        relation_C_D = C.RS.loc[C.RS['node2'] == D.getID()]
                                        relation_D_E = D.RS.loc[D.RS['node2'] == E.getID()]

                                        
                                        for i, row_i in relation_A_B.iterrows(): 
                                            for j, row_j in relation_B_C.iterrows():
                                                for k, row_k in relation_C_D.iterrows(): 
                                                    for l, row_l in relation_D_E.iterrows(): 
                                                        print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + 
                                                                        B.getMot() + " " + B.RT.loc[B.RT['rtid'] == row_j['type']]['trgpname'].to_string(index=False) + " " + 
                                                                        C.getMot() + " " + C.RT.loc[C.RT['rtid'] == row_k['type']]['trgpname'].to_string(index=False) + " "  + 
                                                                        D.getMot() + " " + D.RT.loc[D.RT['rtid'] == row_k['type']]['trgpname'].to_string(index=False) + " "  + 
                                                                        E.getMot() + " : " +
                                                                        str(mean([row_i['w'], row_j['w'], row_k['w'], row_l['w']])))

                                        print("")

# Alive progress bar

In [14]:
from alive_progress import alive_bar
import time

with alive_bar(1000, force_tty=True, bar='fish') as bar:
    for i in range(1000):
        time.sleep(0.01)
        bar()

|¸.·´¯`·.·´¯`·.¸¸.·´¯`·.¸.·´¯`·.·´¯`·.¸¸.| 1000/1000 [100%] in 12.0s (83.24/s)                                          


In [15]:
for i in range(10):
    with alive_bar(100, force_tty=True, ctrl_c=False, title=f'Download {i}') as bar:
        for i in range(100):
            time.sleep(0.02)
            bar()

Download 0 |████████████████████████████████████████| 100/100 [100%] in 2.4s (42.23/s)                                  
Download 1 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.23/s)                                  
Download 2 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.46/s)                                  
Download 3 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.08/s)                                  
Download 4 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.06/s)                                  
Download 5 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.23/s)                                  
Download 6 |████████████████████████████████████████| 100/100 [100%] in 2.4s (41.55/s)                                  
Download 7 |████████████████████████████████████████| 100/100 [100%] in 2.4s (42.54/s)                                  
Download 8 |████████████████████

In [16]:
import time
for x in range (0,5):  
    b = "Loading" + "." * x
    print (b, end="\r")
    time.sleep(1)

Loading....

In [46]:
mot = Terme("adn")

id = int(mot.RS['node1'][0])

print(id)
print(mot.getID())

16747
16747


# Delete

In [58]:
Terme.deleteMyLocalDataBase() 

Trouver la requête qui à merdé...

In [15]:

# import required module
import os
# assign directory
directory = 'data/requests'
 
# iterate over files in
# that directory
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        t = filename.replace(".txt", "")
        print(t)
        try: 
            T = Terme(t)
        except ValueError: 
            print("=====================================================")
        

drogue
eye
astucieux
Entreprise
politique
bêtises
indifférence
bilatérien
phthiraptère
corps
zéro
bonhomme
pancrustacé
vivant
bras
thorax
parasite
squelette
main
protostome
sexe
blagues
ADN
mandibulé
mandibulate
Totò
choano-organisme
jambe
toto
objet
bite
opisthoconte
dormir
mourir
calcul
sale
insectes
linguistique
satisfaction
surnom
Japon
cancre
type
anxiété
Zoologie
cheveux
surprise
entomologie
internet
langage
patte
choanobionte
coelomé
tristesse
botanique
toto-résistance
personne
manger
mort
drôle
uniconte
peur
pédiculidé
célomate
filozoaire
euarthropode
invertébré
blague
métazoaire
Suisse
personnage
bonheur
abdomen
individu
adn
maladie
hyponeurien
vermine
femelle
cou
tête
antiparasitaire
rire
hexapode
eucaryote
oeil
partisan
regarder
pattes
zoologie
amour
hématophage
nerf
protostomien
informatique
phtiriase
parler
humour
animal
ti
école
insecte
eumétazoaire
pied
crainte
rongeur
entreprises
méfiance
holozoaire
nez
élève
néphrozoaire
bêtise
Toto
addition
arthropode
ecdysozoaire
pas

In [9]:
def compose (*functions):
    def inner(arg):
        for f in reversed(functions):
            arg = f(arg)
        return arg
    return inner

def square(x):
    return x ** 2

def increment(x):
    return x + 1

def half(x):
    return x / 2

composed = compose(square, increment, half) # square(increment(half(x)))
composed(5)

In [20]:
from statistics import mean, geometric_mean, harmonic_mean

def compose (function):
    return function([2, 4, 2])



print(compose(geometric_mean)) 


2.519842099789746


In [58]:
def relation(A, B, deep=1, mean=mean, csv=False): 
    if deep == 0: 
        relationDistance0(A, B, mean=mean, csv=csv)
    elif deep == 1: 
        relationDistance1(A, B, mean=mean, csv=csv)
    elif deep == 2: 
        relationDistance2(A, B, mean=mean, csv=csv)
    elif deep == 3: 
        relationDistance3(A, B, mean=mean, csv=csv)
    elif deep == 4: 
        relationDistance4(A, B, mean=mean, csv=csv)
    else:
        print("Usage : ...")