# Générateur des règles - (fr)

In [1]:
import json

from RulesTransduction.tokenizer import *
from RulesTransduction.rulesFST import *

letters = TokenizerTransducer.read_json("Tokens/letters.json")
tex_tokens = TokenizerTransducer.read_json("Tokens/tex_letters.json")

Lemm = RulesTransducer(letters, letters, complete=True, drop=False, maximize=True,  text_file='Rules/fr/Lemmatizer/lemm.txt')

ENCODING = 'utf-8'

### 1. Noms de variables

In [2]:
chars = [chr(i) for i in range(ord('a'), ord('z') + 1)]
Chars = [chr(i) for i in range(ord('A'), ord('Z') + 1)]

# minuscules
min_char = {
    char:[char] for char in chars
}

# majuscules
maj_char = {
    char:["grand " + char.lower()] for char in Chars 
}

# lettres calligraphiques
cal_char = {
    "\\mathcal { " + char + " }":[char.lower() + " calligraphique"] for char in Chars 
}

# black board
mbb_char = {
    "\\mathbb { " + char + " }":[char.lower() + " double barre", char.lower() + " avec double barre", "double barre " + char.lower(), "black board " + char.lower()] for char in Chars
}

greek_min_var_names = [
    "alpha", 
    "beta",
    "gamma",
    "delta",
    "epsilon",
    "zeta",
    "eta",
    "theta",
    "iota",
    "kappa",
    "lambda",
    "mu",
    "nu",
    "xi",
    "pi",
    "rho",
    "sigma",
    "tau",
    "upsilon",
    "phi",
    "chi",
    "psi",
    "omega"
]

greek_maj_var_names = [
    "Gamma",
    "Delta",
    "Theta",
    "Lambda",
    "Xi",
    "Pi",
    "Sigma",
    "Phi",
    "Omega"
]

# minuscules
greek_min_vars = {
    "\\" + var : [var] for var in greek_min_var_names
}
greek_min_vars["\\xi"] = ["xi", "ksi"]
greek_min_vars["\\chi"] = ["chi", "khi"]

# majuscules
greek_maj_vars = {
    "\\" + var : ["grand " + var.lower()] for var in greek_maj_var_names
}
greek_maj_vars["\\Xi"] = ["grand xi", "grand ksi"]

# varname
greek_var_vars = {
    "\\varepsilon":["varepsilon", "var epsilon"],
    "\\varpi":["varpi", "var pi"],
    "\\varrho":["varrho", "var rho"],
    "\\varsigma":["varsigma", "var sigma"]
}


var = {

    **min_char,
    **maj_char,
    **cal_char,
    **mbb_char,
    **greek_min_vars,
    **greek_maj_vars,
    **greek_var_vars
}

### 2. Chiffres

In [3]:
# chiffres
digit = {

    "0":["zero","0"],
    "1":["un","1"],
    "2":["deux","2"],
    "3":["trois","3"],
    "4":["quatre","4"],
    "5":["cinq", "5"],
    "6":["six", "6"],
    "7":["sept", "7"],
    "8":["huit", "8"],
    "9":["neuf", "9"]

}

### 3. Infini

In [4]:
infty = {
    "\\infty":["infini"]
}

### 4. Ensembles

In [5]:
sets = {
    "\\mathbb { N }" : ["entiers naturels", "ensemble des entiers naturels", "ensemble grand n"],
    "\\mathbb { Z }" : ["entiers relatifs", "ensemble des entiers relatifs", "ensemble grand z"],
    "\\mathbb { Q }" : ["rationnels", "ensemble des rationnels", "ensemble rationnel", "ensemble grand q"],
    "\\mathbb { R }" : ["reel", "ensemble des reels", "ensemble reel", "ensemble grand r"],
    "\\mathbb { C }" : ["complexe", "ensemble des complexes", "ensemble complexe", "ensemble grand c"],

    "\\mathbb { N } ^ { * }":["entier strictement positif", "entier naturel non nul"],
    "\\mathbb { Z } ^ { * }":["entier non nul", "relatif non nul"],
    "\\mathbb { R } ^ { + }":["ensemble des reels positifs", "reel positif"],
    "\\mathbb { R } ^ { - }":["ensemble des reels negatifs", "reel negatif"],
    "\\mathbb { R } ^ { + } _ { * }":["ensemble des reels strictement positifs", "ensemble des reels positifs non nuls", "r plus etoile", "reel strictement positif", "reel positif non nul"],
    "\\mathbb { R } ^ { - } _ { * }":["ensemble des reels strictement negatifs", "ensemble des reels negatifs non nuls", "r moins etoile", "reel strictement negatif", "reel negatif non nul"],
}

### 5. Relations

In [6]:
rel = {

    # ordre
    ">":['plus grand', 'superieur', "plus grand que", "superieur à"],
    "\\geq":['plus grand ou egal', 'superieur ou egal', 'plus grand ou egal à', "superieur ou egal à"],
    "<":['plus petit', 'inferieur', 'plus petit que', 'inferieur à'],
    "\\leq":['plus petit ou egal', 'inferieur ou egal', 'plus petit ou egal à', 'inferieur ou egal à'],

    # égalité
    "=":["egal", "egal à", "allant de"],
    ": =":["egal par definition", "defini par"],
    "\\neq":["different", "different de"],
    "\\simeq":["environ egal", "environ egal à", "isomorphe", "isomorphe à"],
    "\\propto":["proportionnel", "proportionnel à"],
    "\\sim":["suit"],
    
    # ensembles
    "\\in":["appartient", "appartient à", "dans"],
    "\\notin":["appartient pas", "appartient pas à"],
    "\\subset":["inclu", "inclu dans"],
    "\\subseteq":["inclu ou egal", "inclu ou egal à"],
    "\\not\\subset":["pas inclu", "pas inclu dans", "non inclu", "non inclu dans"],
    "\\not\\subseteq":["pas inclu ou egal", "non inclu ou egal", "pas inclu ou egal à", "non inclu ou egal à"],

    # convergence et équivalences
    "\\Rightarrow":['implique'],
    "\\Leftrightarrow":["equivalent"],
    "\\rightarrow":['fleche'],
    "\\longrightarrow":['longue fleche', 'tend vers'],
    '\\overset { L } { \\longrightarrow }':['converge en loi'],
    '\\overset { \\mathbb { P } } { \\longrightarrow }':['converge en probabilite'],

    # espace entre expressions
    '\\quad':['espace']

}

### 6. Quantificateurs logiques

In [7]:
quantif = {
    "\\forall":['pour tout'],
    "\\exists":['existe'],
    "\\exists !":['existe un unique', 'existe unique'],
}

### 7. Opérateurs unaires

In [8]:
un_op = {

   "+":['plus'],
   "-":["moins"],

}

### 8. Opérateurs binaires

In [9]:
bin_op = {

    '+':['plus'],
    '-':['moins'],
    '.':['point', 'scalaire'],
    '\\circ':['rond'],
    '\\cup':['union'],
    '\\cap':['inter'],
    '\\times':['croix'],
    '\\otimes':['tenseur', 'produit tensoriel', 'croix rond'],
    '\\oplus':['somme directe', 'plus rond'],
    '\\setminus':['prive', 'prive de'],
    "\\vee":['ou'],
    "\\wedge":['et'],

}

### 10. Fonctions

In [10]:
# fonctions de une variable
func = {

    '\\deg #<apply>':['deg', 'degre'],
    '\\dim #<apply>':['dim', 'dimension'],
    '\\det #<apply>':['det', 'determinant'],
    '\\exp #<apply>':['exponentielle', 'exp'],
    '\\log #<apply>':['logarithme', 'log'],
    '\\cos #<apply>':['cosinus', 'cos'],
    '\\sin #<apply>':['sinus', 'sin'],
    '\\arccos #<apply>':['arccosinus', 'arc cosinus', 'arccos', 'arc cos'],
    '\\arcsin #<apply>':['arcsinus', 'arc sinus', 'arcsin', 'arc sin'],
    '\\arctan #<apply>':['arctangente', 'arc tangente', 'arctan', 'arc tan'],
    '\\mathrm { Re } #<apply>':['partie reelle'],
    '\\mathrm { Im } #<apply>':['partie imaginaire'],

}

# fonctions spéciales (avec controle de séquence différent)
spec_func = {

    '\\frac #<apply>':['fraction', 'rapport'],
    '\\sqrt #<apply>':['racine'],
    '\\binom #<apply>':['binomial', 'coefficient binomial'],
    '\\lVert #<apply>':['norme'],
    '\\vert #<apply>':['valeur absolue'],
    '\\mathbb { E } #<apply>':['esperance'],
    '\\mathbb { P } #<apply>':['probabilite'],
    '\\mathbb { V } #<apply>':['variance'],
    "\\neg #<apply>" : ['non', 'negation'],
    "\\partial #<apply>" : ["d rond","derivee partielle", "derivee"],
    "\\nabla #<apply>" : ["gradient"],

}

# fonctions d'ensemble
set_func = {

    '\\min #<apply>':['min', 'minimum'],
    '\\max #<apply>':['max', 'maximum'],
    '\\mathrm { argmin } #<apply>':['argmin', 'arg min', 'argument min', 'argument minimum'],
    '\\mathrm { argmax } #<apply>':['argmax', 'arg max', 'argument max', 'argument maximum'],
    '\\sup #<apply>':['sup', 'borne superieure'],
    '\\inf #<apply>':['inf', 'borne inferieure'],


}

### 11. Itérateurs

In [11]:
iter = {

    '\\lim':['lim', 'limite'],
    '\\bigotimes':['le produit tensoriel'],
    '\\bigoplus':['la somme directe'],
    '\\bigcup':['le union'],
    '\\bigcap':['le intersection'],
    '\\sum':['somme'],
    '\\prod':['produit'],
    '\\int':["integrale"],
    
    
}

### 12. Caractères spéciaux

In [12]:
symb = {

    ':':['deux points', 'tel que'],
    '\\dots':['trois points', 'trois petits points'],
    ',':['virgule'],
    '\\%':['pourcent', 'pour cent'],
    '\\vert':['conditionnellement', 'conditionnellement à', 'barre verticale', 'sachant'],
    
}

# math cseq
math_cseq = {

    "(":["parenthese"],
    ")":["fermer la parenthese", "fermer parenthese", "parenthese fermante", "fin de parenthese", "fin parenthese"],

    "\\lbrack":["crochet"],
    "\\rbrack":["fermer le crochet", "fermer crochet", "crochet fermant", "fin de crochet", "fin crochet"],

    "\\{":["ensemble"],
    "\\}":["fermer le ensemble", "fermer ensemble", "fin de le ensemble", "fin ensemble"],

}


# tex cseq
tex_cseq = {

    "{":["accolade"],
    "}":["fermer le accolade", "fermer accolade", "accolade fermante", "fin de le accolade", "fin accolade"]
    
}

### 13. Raccourcis naturels

In [14]:
natural_expr = {

    # relations d'ordre
    "\\geq 0":  ["positif"],
    "> 0":      ["strictement positif"],
    "\\leq 0":  ["negatif"],
    "< 0":      ["strictement negatif"],


    # loi normale 0, 1
    '\\quad \\text { resp. } \\quad':['respectivement'],

}

### 14. Mots-clés de grammaire

In [24]:
grammar_keywrds = {

    # symboles temporaires

    # décorateurs de variable
    "#<hat>":["chapeau"],
    "#<tilde>":["tilde"],
    "#<star>":["etoile", "star"],
    "#<vec>":["vecteur"],
    
    # mots clés de structure grammaticale
    "#<for>"         : ["pour"],
    "#<to>"          : ["à"],
    "#<apply>"       : ["de"],
    "#<over>"        : ["sur", "par rapport"],
    "#<fact>"        : ["facteur", "facteur de"],
    

    # opérateurs de variables 
    "#<'>":["prime"],
    "#<ovline>":["barre"],
    "#<t>":["transpose"],
    "#<square>":["carre"],
    "#<cube>":["cube"],
    '#<!>':["factoriel"],

    "#<sub>":['indice', 'underscore'],
    "#<sup>":['puissance', 'exposant']

}

grammar_keys = {

    # symboles finaux
    "\\limits":[],
    "\\tilde":[],
    "\\overline":[],
    "\\hat":[],
    "\\vec":[],
    "\\left":[],
    "\\right":[],
    "\\rVert":[],
    "\\,":[],
    "'":[],
    "*":[],

    # symboles de préservation des types
    "##<":[],
    ">##":[],


}

### Grammaire complète :

In [25]:
grammar = {

    # noms de variables
    "var":var,

    # vals
    "dig":digit,
    "infty":infty,
    "set":sets,
    
    # relations
    "rel":rel,

    # operateurs
    "qtf":quantif,
    "uop":un_op,
    "bop":bin_op,

    # fonctions et applications
    "fun":func,
    "specfun":spec_func,
    "setfun":set_func,
    "iter":iter, 

    # caractères spéciaux
    "symb":symb,

    # symboles de controle de sequence
    "mseq":math_cseq,
    "tseq":tex_cseq,

    # expressions naturelles
    "nlex":natural_expr,

    # éléments non utilisés par la partie lexicale
    "gkey":grammar_keys,

    # mots clés de grammaire (à supprimer sur la sortie)
    "gwrd":grammar_keywrds,
    
}

### Création des fichiers

In [26]:
tex_wrds = set()
vocab = set()
inputs = []
outputs = []

for key in grammar.keys():
    for sub_symbol in grammar[key].keys():
        tex_wrd_split = sub_symbol.split() 
        for wrd in tex_wrd_split : 
            tex_wrds.add(wrd)
        if key != "gkey" : 
            for wrd in grammar[key][sub_symbol]:
                wrd_lemm = Lemm.predict(wrd)
                for sub_wrd in wrd_lemm.split():
                    vocab.add(sub_wrd)
                inputs.append(wrd_lemm)
                outputs.append(sub_symbol)

# Vocabulaire naturel
with open("Vocabulary/vocab_fr.json", 'w', encoding='utf-8') as writer1 :
    json.dump({"vocab":list(vocab)}, writer1, indent=4)

# Règles de transduction lexicales
with open("Rules/fr/SeqToTex/math_rules.txt", 'w', encoding='utf-8') as writer2 : 
    for i, inp in enumerate(inputs) : 
        writer2.write(inp + ';' + outputs[i] + ';\n')

# Tokens LaTeX
with open("Tokens/tex_letters.json", 'w', encoding='utf-8') as writer3 :
    json.dump({"add_space":False, "tokens":list(tex_wrds)}, writer3, indent=4)

# Grammaire LaTeX
with open("Grammar/tex_grammar.json", 'w', encoding='utf-8') as writer4 :
    json.dump({key:list(grammar[key].keys()) for key in grammar.keys()}, writer4, indent=4)
