# Mise en commun du code et du parser pour jeux de mots

In [5]:
import time
import requests
import pandas as pd 
from os.path import exists
from alive_progress import alive_bar


# Arborescence du sysème de fichier 
PATH            = "data/"
PATH_REQUEST    = "data/requests/"
PATH_DEF        = "data/def/"
PATH_E          = "data/e/"
PATH_NT         = "data/nt/"
PATH_RE         = "data/re/"
PATH_RS         = "data/rs/"
PATH_RT         = "data/rt/"

# Extension des fichiers 
EXT_REQUEST     = ".txt"
EXT_DEF         = "_def.csv"
EXT_E           = "_e.csv"
EXT_NT          = "_nt.csv"
EXT_RE          = "_re.csv"
EXT_RS          = "_rs.csv"
EXT_RT          = "_rt.csv"

class Terme: 
    def __init__(self, mot):
        self.mot = mot

        if self.isKnow(): 
            self.load()
        else: 
            self.download()

    def getMot(self): 
        return self.mot

    def getID(self): 
        return self.id

    def isKnow(self):
        return exists(PATH_REQUEST + self.mot + EXT_REQUEST)

    def request(self): 
        url = 'https://www.jeuxdemots.org/rezo-dump.php?gotermsubmit=Chercher&gotermrel=' + self.mot + '&rel='
        headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
        r = requests.post(url, headers=headers)

        # Enregistrer la requetes dans un fichier text
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "w")
        f.write(r.text)
        f.close()

        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()
        f.close()

        return "<br>//&nbsp; &nbsp; &nbsp; WARNING TROP GROS.<br>TOWARD CACHE<br>\n" not in lines

    def download(self):
        i = 0

        while not self.request() and i < 5: 
            print("Pb download")
            i += 1

        self.load()

    def load(self):
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()

        code = [lines.index('<CODE>\n'), lines.index('</CODE>\n') +1]
        #print(code)

        index = {}

        for i in range(code[0], code[1]):
            if lines[i] == "<def>\n":
                index["def"] = i

            if lines[i] == "// les types de noeuds (Nodes Types) : nt;ntid;'ntname'\n":
                index["nt"] = i

            if lines[i] == "// les noeuds/termes (Entries) : e;eid;'name';type;w;'formated name' \n":
                index["e"] = i

            if lines[i] == "// les types de relations (Relation Types) : rt;rtid;'trname';'trgpname';'rthelp' \n":
                index["rt"] = i

            if lines[i] == "// les relations sortantes : r;rid;node1;node2;type;w \n":
                index["rs"] = i

            if lines[i] == "// les relations entrantes : r;rid;node1;node2;type;w \n":
                index["re"] = i 

            if lines[i] == "// END\n":
                index["end"] = i 

        #index
        m_def   = open(PATH_DEF + self.mot + EXT_DEF, "w")
        m_nt    = open(PATH_NT  + self.mot + EXT_NT,  "w")
        m_e     = open(PATH_E   + self.mot + EXT_E,   "w")
        m_rt    = open(PATH_RT  + self.mot + EXT_RT,  "w")
        m_rs    = open(PATH_RS  + self.mot + EXT_RS,  "w")
        m_re    = open(PATH_RE  + self.mot + EXT_RE,  "w")

        # Head
        m_nt.write("nt;ntid;ntname\n")
        m_e.write("e;eid;name;type;w;help\n")
        m_rt.write("rt;rtid;trname;trgpname;rthelp\n")
        m_rs.write("r;rid;node1;node2;type;w\n")
        m_re.write("r;rid;node1;node2;type;w\n")

        for i in range(index['def'], index['nt']):
            m_def.write(lines[i])

        for i in range(index['nt']+2, index['e']):
            m_nt.write(lines[i].replace("'", ""))

        for i in range(index['e']+2, index['rt']):
            if lines[i].count(";") <= 5:
                m_e.write(lines[i].replace("'", ""))

        for i in range(index['rt']+2, index['rs']):
            m_rt.write(lines[i].replace(" ; ", " ").replace("'", "")) # Modifier les séparateur pour éviter les pb à l'ouverture

        if 're' in index and 'rs' in index: 
            for i in range(index['rs']+2, index['re']):
                m_rs.write(lines[i])

            for i in range(index['re']+2, index['end']):
                m_re.write(lines[i])
        elif 'rs' in index: 
            for i in range(index['rs']+2, index['end']):
                m_rs.write(lines[i])
        elif 're' in index: 
            for i in range(index['re']+2, index['end']):
                m_re.write(lines[i])

        m_def.close()
        m_nt.close()
        m_e.close()
        m_rt.close()
        m_rs.close()
        m_re.close()

        self.E  = pd.read_csv(PATH_E  + self.mot + EXT_E,  sep=";")
        self.NT = pd.read_csv(PATH_NT + self.mot + EXT_NT, sep=";")
        self.RE = pd.read_csv(PATH_RE + self.mot + EXT_RE, sep=";")
        self.RS = pd.read_csv(PATH_RS + self.mot + EXT_RS, sep=";")
        self.RT = pd.read_csv(PATH_RT + self.mot + EXT_RT, sep=";")
        self.R  = pd.concat([self.RS, self.RE])

        self.id = int(self.E.loc[self.E['name'] == self.mot]['eid'])


In [6]:
A = Terme("toto")
B = Terme("Suisse")
C = Terme("skate")
D = Terme("bite")
E = Terme("drogue")
F = Terme("sale")
G = Terme("surprise")
H = Terme("bonheur")

In [7]:
def relationDistance1(A, B): 
    relation = A.RS.loc[A.RS['node2'] == B.getID()]
    relation = relation['type']

    for i in relation: 
        print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + B.getMot())
    

In [10]:
def relationDistance2(A, B):
    c = 0
    termes = []

    for eidS in A.RS['node2']: 
        tmp = len(B.RE.loc[B.RE['node1'] == eidS])
        
        if tmp > 0: 
            terme = A.E.loc[A.E['eid'] == eidS]['name'].to_string(index=False)
            if " " not in terme and "<" not in terme and">" not in terme and":" not in terme and"\x9c" not in terme:
                termes.append(terme)

        c += tmp 

    termes = list(dict.fromkeys(termes))
    
    print(termes)
    print(c)

    relation_A_B = {}

    with alive_bar(len(termes) + 1, force_tty=True) as bar:
        for terme in termes:     
            #print(" ========================= " + terme + " ========================= ")
            AB = Terme(terme)
            #relationDistance1(A, AB)
            #relationDistance1(AB, B)
            #print("////////")

            relation_A_AB = A.RS.loc[A.RS['node2'] == AB.getID()]
            #relation_A_AB = relation_A_AB['type']
            bar()
            for i, row_i in relation_A_AB.iterrows(): 
                # Regarder la différence entre les deux lignes 
                #relation_AB_B = AB.RS.loc[AB.RS['node2'] == B.getID()] 
                relation_AB_B = B.RE.loc[B.RE['node1'] == AB.getID()]
                #relation_AB_B = relation_AB_B['type']

                for j, row_j in relation_AB_B.iterrows():
                    #print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + AB.getMot() + " " + AB.RT.loc[AB.RT['rtid'] == j]['trgpname'].to_string(index=False) + " " + B.getMot())
                    relation_A_B[A.getMot() + " " + A.RT.loc[A.RT['rtid'] == row_i['type']]['trgpname'].to_string(index=False) + " " + AB.getMot() + " " + B.RT.loc[B.RT['rtid'] == row_j['type']]['trgpname'].to_string(index=False) + " " + B.getMot()] = row_i['w'] + row_j['w'] / 2
                    
        bar()

    #print(relation_A_B)
    relation_A_B_sorted = {}

    for i in sorted(relation_A_B, key=relation_A_B.get, reverse=True):
        relation_A_B_sorted[i] = relation_A_B[i]

    for r in relation_A_B_sorted: 
        print(str(r) + " : " + str(relation_A_B_sorted[r]))

           

            


In [13]:
#relationDistance1(A, F)
relationDistance2(C, F)

['punk', 'piscine', 'chaussure', 'basket', 'terrain', 'parc', 'rat', 'objet', 'voiture', 'art']
13
|████████████████████████████████████████| 11/11 [100%] in 15.1s (0.73/s)                                               
skate idée associée rat idée associée sale : 66.5
skate générique objet caractéristique sale : 53.5
skate magn voiture caractéristique sale : 36.0
skate idée associée basket caractéristique sale : 31.0
skate idée associée piscine caractéristique sale : 28.0
skate r_wiki art domaine-1 sale : 23.0
skate r_wiki art idée associée sale : 21.5
skate idée associée rat caractéristique sale : 16.5
skate idée associée objet caractéristique sale : 15.5
skate idée associée chaussure caractéristique sale : 12.0
skate idée associée terrain caractéristique sale : 11.5
skate idée associée parc caractéristique sale : 11.5
skate idée associée punk r_aki sale : 1.5


In [17]:
relationDistance2(A, F)

['adn', 'pied', 'oeil', 'yeux', 'corps', 'ADN', 'maladie', 'cheveux', 'sale', 'patte', 'animal', 'politique', 'tête', 'pou', 'eye', 'squelette', 'neck', 'sexe', 'bras', 'coeur', 'cou', 'main', 'bouche', 'jambes', 'nez', 'visage', 'jambe', 'mort', 'vivant', 'petit', 'toto', 'regarder', 'dormir', 'parler', 'mourir', 'vivre', 'indifférence', 'calcul', 'gamin']
1889
toto idée associée adn tout sale
toto idée associée adn idée associée sale
toto partie adn tout sale
toto partie adn idée associée sale
toto idée associée pied caractéristique sale
toto idée associée pied tout sale
toto idée associée pied idée associée sale
toto partie pied caractéristique sale
toto partie pied tout sale
toto partie pied idée associée sale
toto idée associée oeil tout sale
toto idée associée oeil idée associée sale
toto partie oeil tout sale
toto partie oeil idée associée sale
toto idée associée yeux tout sale
toto idée associée yeux idée associée sale
toto partie yeux tout sale
toto partie yeux idée associée s

## Tests

In [69]:
relation = A.RS.loc[A.RS['node2'] == B.getID()]

relation = relation['type']


for i in relation: 
    print(A.getMot() + " " + A.RT.loc[A.RT['rtid'] == i]['trgpname'].to_string(index=False) + " " + B.getMot())

toto idée associée Suisse
toto glose/sens/signification Suisse


In [19]:
mylist = ["a", "b", "a", "c", "c"]
mylist = list(dict.fromkeys(mylist))
print(mylist)

['a', 'b', 'c']


# Alive progress bar

In [8]:
from alive_progress import alive_bar
import time

with alive_bar(1000, force_tty=True, bar='fish') as bar:
    for i in range(1000):
        time.sleep(0.01)
        bar()

|¸.·´¯`·.·´¯`·.¸¸.·´¯`·.¸.·´¯`·.·´¯`·.¸¸.| 1000/1000 [100%] in 10.2s (97.88/s)                                          in 4s (98.3/s, eta: 6s) 


In [10]:
for i in range(10):
    with alive_bar(100, force_tty=True, ctrl_c=False, title=f'Download {i}') as bar:
        for i in range(100):
            time.sleep(0.02)
            bar()

Download 0 |████████████████████████████████████████| 100/100 [100%] in 2.1s (48.50/s)                                  
Download 1 |████████████████████████████████████████| 100/100 [100%] in 2.0s (49.52/s)                                  
Download 2 |████████████████████████████████████████| 100/100 [100%] in 2.1s (48.28/s)                                  
Download 3 |████████████████████████████████████████| 100/100 [100%] in 2.1s (48.22/s)                                  
Download 4 |████████████████████████████████████████| 100/100 [100%] in 2.0s (48.93/s)                                  
Download 5 |████████████████████████████████████████| 100/100 [100%] in 2.0s (49.40/s)                                  
Download 6 |████████████████████████████████████████| 100/100 [100%] in 2.1s (48.20/s)                                  
Download 7 |████████████████████████████████████████| 100/100 [100%] in 2.0s (49.03/s)                                  
Download 8 |████████████████████

# Init

In [9]:
from os.path import exists
from os import mkdir, rmdir


if not exists(PATH): 
    mkdir(PATH)

if not exists(PATH_DEF): 
    mkdir(PATH_DEF)

if not exists(PATH_REQUEST): 
    mkdir(PATH_REQUEST)

if not exists(PATH_E): 
    mkdir(PATH_E)

if not exists(PATH_NT): 
    mkdir(PATH_NT)

if not exists(PATH_RE): 
    mkdir(PATH_RE)

if not exists(PATH_RS): 
    mkdir(PATH_RS)

if not exists(PATH_RT): 
    mkdir(PATH_RT)

    

# Delete

In [8]:
import shutil
from os.path import exists

if exists(PATH):
    shutil.rmtree(PATH)