# Mise en commun du code et du parser pour jeux de mots

In [18]:
import requests
import pandas as pd 
from os.path import exists

# Arborescence du sysème de fichier 
PATH_REQUEST = "data/requests/"
PATH_DEF  = "data/def/"
PATH_E    = "data/e/"
PATH_NT   = "data/nt/"
PATH_RE   = "data/re/"
PATH_RS   = "data/rs/"
PATH_RT   = "data/rt/"

# Exrension des fichiers 
EXT_REQUEST = ".txt"
EXT_DEF  = "_def.csv"
EXT_E    = "_e.csv"
EXT_NT   = "_nt.csv"
EXT_RE   = "_re.csv"
EXT_RS   = "_rs.csv"
EXT_RT   = "_rt.csv"

class Terme: 
    def __init__(self, mot):
        self.mot = mot

        if self.isKnow(): 
            self.load()
        else: 
            self.download()

    def getMot(self): 
        return self.mot

    def isKnow(self):
        return exists(PATH_REQUEST + self.mot + EXT_REQUEST)

    def download(self):
        url = 'https://www.jeuxdemots.org/rezo-dump.php?gotermsubmit=Chercher&gotermrel=' + self.mot + '&rel='
        headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
        r = requests.post(url, headers=headers)

        # Enregistrer la requetes dans un fichier text
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "w")
        f.write(r.text)
        f.close()

        self.load()

    def load(self):
        f = open(PATH_REQUEST + self.mot + EXT_REQUEST, "r")
        lines = f.readlines()

        code = [lines.index('<CODE>\n'), lines.index('</CODE>\n') +1]
        #print(code)

        index = {}

        for i in range(code[0], code[1]):
            if lines[i] == "<def>\n":
                index["def"] = i

            if lines[i] == "// les types de noeuds (Nodes Types) : nt;ntid;'ntname'\n":
                index["nt"] = i

            if lines[i] == "// les noeuds/termes (Entries) : e;eid;'name';type;w;'formated name' \n":
                index["e"] = i

            if lines[i] == "// les types de relations (Relation Types) : rt;rtid;'trname';'trgpname';'rthelp' \n":
                index["rt"] = i

            if lines[i] == "// les relations sortantes : r;rid;node1;node2;type;w \n":
                index["rs"] = i

            if lines[i] == "// les relations entrantes : r;rid;node1;node2;type;w \n":
                index["re"] = i 

            if lines[i] == "// END\n":
                index["end"] = i 

        #index
        m_def   = open(PATH_DEF + self.mot + EXT_DEF, "w")
        m_nt    = open(PATH_NT  + self.mot + EXT_NT,  "w")
        m_e     = open(PATH_E   + self.mot + EXT_E,   "w")
        m_rt    = open(PATH_RT  + self.mot + EXT_RT,  "w")
        m_rs    = open(PATH_RS  + self.mot + EXT_RS,  "w")
        m_re    = open(PATH_RE  + self.mot + EXT_RE,  "w")

        # Head
        m_nt.write("nt;ntid;ntname\n")
        m_e.write("e;eid;name;type;w;help\n")
        m_rt.write("rt;rtid;trname;trgpname;rthelp\n")
        m_rs.write("r;rid;node1;node2;type;w\n")
        m_re.write("r;rid;node1;node2;type;w\n")

        for i in range(index['def'], index['nt']):
            m_def.write(lines[i])

        for i in range(index['nt']+2, index['e']):
            m_nt.write(lines[i].replace("'", ""))

        for i in range(index['e']+2, index['rt']):
            m_e.write(lines[i].replace("'", ""))

        for i in range(index['rt']+2, index['rs']):
            m_rt.write(lines[i].replace(" ; ", " ").replace("'", "")) # Modifier les séparateur pour éviter les pb à l'ouverture

        for i in range(index['rs']+2, index['re']):
            m_rs.write(lines[i])

        for i in range(index['re']+2, index['end']):
            m_re.write(lines[i])

        m_def.close()
        m_nt.close()
        m_e.close()
        m_rt.close()
        m_rs.close()
        m_re.close()

        self.E  = pd.read_csv(PATH_E  + self.mot + EXT_E,  sep=";")
        self.NT = pd.read_csv(PATH_NT + self.mot + EXT_NT, sep=";")
        self.RE = pd.read_csv(PATH_RE + self.mot + EXT_RE, sep=";")
        self.RS = pd.read_csv(PATH_RS + self.mot + EXT_RS, sep=";")
        self.RT = pd.read_csv(PATH_RT + self.mot + EXT_RT, sep=";")


In [19]:
A = Terme("toto")
B = Terme("Suisse")
C = Terme("skate")
D = Terme("bite")
E = Terme("drogue")