In [1]:
from elg import Service
import requests

In [3]:
with open('paragraphe.txt', encoding='utf-8') as f:
    text = f.read()

In [4]:
service = Service.from_id(8122)

Please go to this URL in your browser: https://live.european-language-grid.eu/auth/realms/ELG/protocol/openid-connect/auth?client_id=elg-oob&redirect_uri=urn:ietf:wg:oauth:2.0:oob&response_type=code&scope=openid



In [5]:
# La longueur du texte est tres grande, donc on va le diviser en phrases
from nltk.tokenize import sent_tokenize

sentences = sent_tokenize(text)

len(sentences)

9

In [6]:
results = []
for i,sen in enumerate(sentences):
    try:
        results.append(service(request_input=sen, request_type="text"))
    except Exception:
        print('*'*50 + str(i))

Calling:
	[8122] Text to Terminological Concept System
with request:
	type: text - content: Une lombalgie sur cinq entraîne un arrêt de travail. - mimeType: text/plain

Progress: 33.3%
Progress: 33.3%
Progress: 100.0%
Calling:
	[8122] Text to Terminological Concept System
with request:
	type: text - content: Elle représente 30 % des arrêts de travail de plus de 6 mois et 20 % des accidents du travail, avec des arrêts de travail d’une durée de 2 mois en moyenne. - mimeType: text/plain

Progress: 1.0%
Progress: 33.3%
Progress: 33.3%
Progress: 100.0%
Calling:
	[8122] Text to Terminological Concept System
with request:
	type: text - content: Elle est ainsi devenue la 1re cause d’exclusion du travail avant 45 ans et le 3e motif d’admission en invalidité (4). - mimeType: text/plain

Progress: 1.0%
Progress: 33.3%
Progress: 33.3%
Progress: 100.0%
Calling:
	[8122] Text to Terminological Concept System
with request:
	type: text - content: Un autre enjeu est de prévenir la chronicisation de la l

In [7]:
len(results)

9

In [8]:
results[0]



In [9]:
dictRest = []
for result in results:
    for item in result['annotations'].values():
        for annot in item:
            dictRest.append((annot['start'], annot['end'], annot['features']))

In [2]:
import json

In [11]:
with open("Text2TCS_parg.json", 'w', encoding="UTF-8") as f:
    f.write(json.dumps(dictRest))

In [3]:
with open("Text2TCS.json") as f:
    dictRest = json.loads(f.read())

In [4]:
dictRest[0]

[0,
 10,
 {'id': 'c01',
  'term': 'FICHE MÉMO',
  'relations': [{'type': 'genericRelation',
    'related concept': 'c06',
    'related terms': 'document'}]}]

In [5]:
dictRest

[[0,
  10,
  {'id': 'c01',
   'term': 'FICHE MÉMO',
   'relations': [{'type': 'genericRelation',
     'related concept': 'c06',
     'related terms': 'document'}]}],
 [270,
  282,
  {'id': 'c01',
   'term': 'fiches mémo',
   'relations': [{'type': 'genericRelation',
     'related concept': 'c06',
     'related terms': 'document'}]}],
 [12,
  27,
  {'id': 'c02',
   'term': 'Prise en charge',
   'relations': [{'type': 'genericRelation',
     'related concept': 'c08',
     'related terms': 'Prise'},
    {'type': 'associativeRelation',
     'related concept': 'c03',
     'related terms': 'patient présentant'}]}],
 [31, 49, {'id': 'c03', 'term': 'patient présentant', 'relations': []}],
 [55,
  72,
  {'id': 'c04',
   'term': 'lombalgie commune',
   'relations': [{'type': 'genericRelation',
     'related concept': 'c10',
     'related terms': 'méthode d’élaboration'}]}],
 [78,
  99,
  {'id': 'c05',
   'term': 'RAPPORT D’ÉLABORATION',
   'relations': [{'type': 'genericRelation',
     'related 

In [19]:
len(dictRest)

1765

## Extracting relations

In [15]:
def makeTriple(term, relations):
    triples = []
    for relation in relations:
        triples.append((term, relation['type'], relation['related terms']))
    return triples

In [20]:
relations = []
for entity in dictRest:
    if entity[2]['relations']:
        relations += makeTriple(entity[2]['term'], entity[2]['relations'])

In [25]:
relations[:]

[('FICHE MÉMO', 'genericRelation', 'document'),
 ('fiches mémo', 'genericRelation', 'document'),
 ('Prise en charge', 'genericRelation', 'Prise'),
 ('Prise en charge', 'associativeRelation', 'patient présentant'),
 ('lombalgie commune', 'genericRelation', 'méthode d’élaboration'),
 ('RAPPORT D’ÉLABORATION', 'genericRelation', 'méthode d’élaboration'),
 ('texte de recommandations', 'genericRelation', 'document'),
 ('Prise', 'activityRelation', 'lombalgie commune'),
 ('patient', 'activityRelation', 'Prise'),
 ('patient', 'associativeRelation', 'lombalgie commune'),
 ('messages-clés', 'genericRelation', 'méthode d’élaboration'),
 ('messages-clés', 'partitiveRelation', 'recommandations'),
 ('messages-clés', 'propertyRelation', 'format'),
 ('fiches mémo', 'propertyRelation', 'qualité'),
 ('fiches mémo', 'instrumentalRelation', 'sécurité'),
 ('qualité', 'associativeRelation', 'sécurité'),
 ('professionnel de santé', 'activityRelation', 'fiches mémo'),
 ('discernement', 'activityRelation', 'p

In [24]:
len(relations)

1590

## Extracting terms

In [15]:
terms = []
for entity in dictRest:
    terms.append(entity[2]['term'])

In [16]:
terms

['lombalgie',
 'arrêt de travail',
 'arrêts de travail',
 'accidents du travail',
 'cause',
 'motif',
 'chronicisation',
 'lombalgie',
 'douleur lombaire',
 'douleur radiculaire',
 'guide',
 'douleur radiculaire',
 'douleur',
 'extrémités inférieures',
 'dermatome(s)',
 'patients',
 'lombalgie',
 'isolée',
 'douleurs radiculaires',
 'douleur provoquée',
 'composante',
 'neuropathique',
 'lésion du système nerveux)',
 'douleurs lombaires',
 'lombalgie)',
 'douleurs lombaires et radiculaires',
 'sciatique)',
 'version française']

In [17]:
len(terms)

28

In [18]:
len(set(terms))

25

In [26]:
import pandas as pd

df = pd.DataFrame(relations, columns = ["term 1", "relation", "term 2"])

df.to_excel("text2TCS_triples.xlsx")

In [28]:
with open("Text2TCS_triples.json", 'w', encoding="UTF-8") as f:
    f.write(json.dumps(relations))

In [7]:
with open("Text2TCS_terms.json") as f:
    terms = json.loads(f.read())