# Notebook pour mieux comprendre comment lier deux entités d'intérêt avec un parser de dépendance

In [1]:
from pprint import pprint
from glob import glob
import pkg_resources
import re
import os
import time
import csv
import yaml
import sys
import shutil


#from pymedext_eds.viz import display_annotations
from pymedextcore.document import Document
from pymedextcore.brattransform import brat
from pymedextcore.annotators import Annotation, Annotator, Relation

with open("config.yml", "r") as ymlfile:
    cfg = yaml.safe_load(ymlfile)
    
    

sys.path.append(cfg["pymedext_dir"])



from annotators import Endlines, SentenceTokenizer, Hypothesis, \
                                    ATCDFamille, SyntagmeTokenizer, Negation, RegexMatcher, \
                                    Pipeline, SectionSplitter

from annotatorsTox import QuickUMLSAnnotator, StanzaAnnotator

from utils import rawtext_loader

# à décommenter pour faire la deuxième partie du notebook, qui porte sur le passage d'annotations pymedext à la génération d'une ontolgie :
# from owlready2 import *

To use RuSHSentenceTokenizer, install PyRuSH using "pip install PyRuSH"


In [2]:
# from cleansAdminParts import *
# from creatsBagsofAdminWords import *
# from utils_functions import *

In [3]:
# à décommenter pour faire la deuxième partie du notebook, qui porte sur le passage d'annotations pymedext à la génération d'une ontolgie :

# from owlready2 import *
# default_world.set_backend(filename ="/rapids/notebooks/host/alice_work/2021_chimioToxDict/src/pymedterminoMDRWHOFRE.sqlite3", exclusive=False)
# PYM = get_ontology("http://PYM/").load()

# MDR = PYM["MDR"]
# WHO = PYM["WHO"]
# CUI = PYM["CUI"]
# MDRFRE = PYM["MDRFRE"]
# WHOFRE = PYM["WHOFRE"]

In [4]:
# CUI["C0004093"].synonyms

# Charger les données (sans enlever admin parts, qui est en cours de changement)

In [5]:
data_FT = cfg["data"]["NR"]
file_list=os.listdir(data_FT)
#list_text=[removesAdminParts2("ech_ccolon_nidocs/" + x,starting_list_ngram, ending_list_ngram,"ech_clean_ccolon_nidocs/")for x in file_list]
#print(file_list)

In [6]:
data_path=data_FT
docs=[]
for x in file_list :
    docs.append(rawtext_loader(data_path + x))

In [7]:
docs[0].source_ID

'2222222222'

# 1) Détection d'entités d'intérêt : Quickumls pour les toxicités et Regex pour les grades

Préalablement, on a construit un fichier "MRCONSO.RRF" pour utiliser quickumls. ("https://github.com/Georgetown-IR-Lab/QuickUMLS")
Ce fichier a été construits en filtrant les concepts de l'UMLS sur ceux qui concernent des toxicités, et en ajoutant des synonymes trouvés dans différentes sources de terminologies qui concernent les toxicités.

In [8]:
quickumls_fp_2=cfg["quickumls"]

In [9]:
endlines = Endlines(['raw_text'], 'endlines', 'endlines:v1')
sentences = SentenceTokenizer(['endlines'], 'sentence', 'sentenceTokenizer:v1')
hypothesis = Hypothesis(['sentence'], 'hypothesis', 'hypothesis:v1')
family = ATCDFamille(['sentence'], 'context', 'ATCDfamily:v1')
syntagmes = SyntagmeTokenizer(['sentence'], 'syntagme', 'SyntagmeTokenizer:v1')
negation = Negation(['syntagme', 'sentence'], 'negation', 'Negation:v1')
quickumls = QuickUMLSAnnotator(['sentence'],'QuickUMLS', 'QuickUMLS:v1',quickumls_fp=quickumls_fp_2)
regex_grade = RegexMatcher(['endlines', 'syntagme'], 'regex_grade', 'RegexMatcher:v1', '/rapids/notebooks/host/alice_work/clone_de_pymedext_eds/pymedext_eds/regex_grade.json')
regex_triggers = RegexMatcher(['endlines', 'syntagme'], 'regex_triggers', 'RegexMatcher:v1', '/rapids/notebooks/host/alice_work/clone_de_pymedext_eds/pymedext_eds/regex_triggers.json')

In [10]:
pipeline1 = Pipeline(pipeline = [endlines, sentences, hypothesis, family, syntagmes, negation, quickumls, regex_grade, regex_triggers])

In [11]:
annotated_docs = pipeline1.annotate(docs)

  for m in re.finditer(rex['regexp'], syntagme.value, flags=reflags):


In [12]:
annotated_pymedext_documents = [Document.from_dict(annotated_doc) for annotated_doc in annotated_docs]

## Modifier l'annotation regex_grade : mettre le numéro du grade dans les attributs

In [13]:
for thisDoc in annotated_pymedext_documents :
    anns=thisDoc.get_annotations(_type="regex_grade")
    for ann in anns :
        if re.search("([0-5]|III|IV|II|I|V)", ann.value):
            num = re.search("([0-5]|III|IV|II|I|V)", ann.value).group(0)
            ann.attributes["num"] = num
            if num == "0" :
                ann.attributes["normalized_num"] = "0"
            elif num in ["1", "I"]:
                ann.attributes["normalized_num"] = "1"
            elif num in ["2", "II"]:
                ann.attributes["normalized_num"] = "2"
            elif num in ["3", "III"]:
                ann.attributes["normalized_num"] = "3"
            elif num in ["4", "IV"]:
                ann.attributes["normalized_num"] = "4"
            elif num in ["5", "V"]:
                ann.attributes["normalized_num"] = "5"
            else :
                ann.attributes["normalized_num"] = "null"
        else :
            ann.attributes["num"] = "null"
            ann.attributes["normalized_num"] = "null"

## Sélectionner les phrases avec au moins une tox

In [14]:
for thisDoc in annotated_pymedext_documents :
    list_id_sent_tox = []
    for annQ in thisDoc.get_annotations(_type="QuickUMLS"):
        if annQ.source_ID not in list_id_sent_tox :
            list_id_sent_tox.append(annQ.source_ID)

    set_sent_tox = []        
    for id_sent_tox in list_id_sent_tox :
        annots = thisDoc.get_annotation_by_id(id_sent_tox)
        annots.type = "sentence_tox"

## Sélectionner les phrases avec au moins un trigger

In [15]:
for thisDoc in annotated_pymedext_documents :
    list_id_sent_trigs = []
    for annQ in thisDoc.get_annotations(_type="regex_triggers"):
        if annQ.source_ID not in list_id_sent_trigs :
            list_id_sent_trigs.append(annQ.source_ID)

    set_sent_tox = []        
    for id_sent_trig in list_id_sent_trigs :
        annots = thisDoc.get_annotation_by_id(id_sent_trig)
        annots.type = "sentence_triggers"

## 2) Dependency parser dans les phrases avec au moins une tox avec Stanza

In [16]:
stanzaparser_tox = StanzaAnnotator(['sentence_tox'], 'Stanza_tox', 'Stanza:v1')
stanzaparser_triggers = StanzaAnnotator(['sentence_triggers'], 'Stanza_triggers', 'Stanza:v1')

2022-04-07 12:39:59 INFO: Loading these models for language: fr (French):
| Processor | Package |
-----------------------
| tokenize  | gsd     |
| mwt       | gsd     |
| pos       | gsd     |
| lemma     | gsd     |
| depparse  | gsd     |

2022-04-07 12:39:59 INFO: Use device: gpu
2022-04-07 12:39:59 INFO: Loading: tokenize
2022-04-07 12:40:01 INFO: Loading: mwt
2022-04-07 12:40:01 INFO: Loading: pos
2022-04-07 12:40:02 INFO: Loading: lemma
2022-04-07 12:40:02 INFO: Loading: depparse
2022-04-07 12:40:02 INFO: Done loading processors!
2022-04-07 12:40:02 INFO: Loading these models for language: fr (French):
| Processor | Package |
-----------------------
| tokenize  | gsd     |
| mwt       | gsd     |
| pos       | gsd     |
| lemma     | gsd     |
| depparse  | gsd     |

2022-04-07 12:40:02 INFO: Use device: gpu
2022-04-07 12:40:02 INFO: Loading: tokenize
2022-04-07 12:40:02 INFO: Loading: mwt
2022-04-07 12:40:02 INFO: Loading: pos
2022-04-07 12:40:02 INFO: Loading: lemma
2022-04-0

In [17]:
pipeline2 = Pipeline(pipeline = [stanzaparser_tox, stanzaparser_triggers])

In [18]:
annotated_docs2 = pipeline2.annotate(annotated_pymedext_documents)

In [19]:
annotated_pymedext_documents2 = [Document.from_dict(annotated_doc) for annotated_doc in annotated_docs2]

In [20]:
type(annotated_docs2[0])
type(annotated_pymedext_documents2[0].annotations[0])
type(annotated_pymedext_documents2[0].relations[0])

pymedextcore.annotators.Relation

In [21]:
# annotated_pymedext_documents2[0].annotations[7].to_dict()
# annotated_pymedext_documents2[0].annotations[7].get_children_span()
# for doc in annotated_pymedext_documents2 :
#     for ann in doc.annotations :
#         if ann.get_entities_children() :
#             print("parent :", ann.to_dict()["type"])
#             for child_ann in ann.get_entities_children() :
#                 print("\t child", child_ann.to_dict()["type"])
#                 print('\n',child_ann.to_dict() == ann.to_dict() )
                
            #print("\n")
    #print("\n\n")

## Restreindre les relations Stanza autour de la tox (quickumls pour le moment, TODO : triggers terms)

### Définitions de fonctions pour trouver tous les enfants des tox quickumls

In [22]:
def select_stanza_ann_with_span(list_spans, tox_span) :
    selected_spans = []
    in_the_tox_expression = False # pour les tox de plusieurs mots
    for span in list_spans :
        if span[0] == tox_span[0]:
            selected_spans.append(span)
            if span[1] == tox_span[1]: # 1 seul mot dans la tox
                break
            else :
                in_the_tox_expression = True
        elif span[1] < tox_span[1] and in_the_tox_expression:
            selected_spans.append(span)
        elif span[1] == tox_span[1] and in_the_tox_expression:
            selected_spans.append(span)
            break
    return selected_spans

In [23]:
def mine_relations_recursivly(ID_of_ann, document, relations_list) :
    if  len(document.get_relations(head_id = ID_of_ann)) == 0 :
        #print("no", document.get_annotation_by_id(ID_of_ann).value)
        #print("relations_list if", relations_list)
        return(relations_list)
    else :
        #print("yes", document.get_annotation_by_id(ID_of_ann).value)
        thisRelations = [rel for rel in document.get_relations(head_id = ID_of_ann)]
        for rel in thisRelations :
            relations_list.append(rel)
            ID_of_ann = rel.target
            #print("relations_list else", relations_list)
            mine_relations_recursivly(ID_of_ann, document, relations_list)

#### Visualisation des relations 

In [24]:
annotated_pymedext_documents_toward_tox = [Document.from_dict(annotated_doc) for annotated_doc in annotated_docs2]
for doc in annotated_pymedext_documents_toward_tox  :
    list_span_annQ = []
    quickumls_anns = doc.get_annotations(_type="QuickUMLS")
    for annQ in quickumls_anns:
        list_span_annQ.append(annQ.span)
    stanza_annots = doc.get_annotations(_type="Stanza_tox")
    dict_spans={}
    for annS in stanza_annots :
        dict_spans[annS.span] = annS.ID
    list_of_lists_span_co_SQ = []
    for Qspan in list_span_annQ :
        list_of_lists_span_co_SQ.append(select_stanza_ann_with_span(list_spans=[key for key in dict_spans], tox_span=Qspan))
    list_of_relations_list =[]
    for list_span in list_of_lists_span_co_SQ :
        for spanSQ in list_span :
            relations_list = []
            mine_relations_recursivly(ID_of_ann=dict_spans[spanSQ],document=doc, relations_list = relations_list)
            list_of_relations_list.append(relations_list)
    for rel_list in list_of_relations_list :
        for rel in rel_list :
            rel.type="toward_tox"
            ann_head = doc.get_annotation_by_id(rel.head)
            ann_head.type = "toward_tox"
            ann_target = doc.get_annotation_by_id(rel.target)
            ann_target.type = "toward_tox"
            if rel not in doc.relations :
                doc.relations.append(rel)

In [25]:
for ann in annotated_pymedext_documents_toward_tox[0].get_relations(_type="toward_tox") :
    print(ann.to_dict())
    print(annotated_pymedext_documents_toward_tox[0].get_annotation_by_id(ann.head).to_dict()["value"])
    print(annotated_pymedext_documents_toward_tox[0].get_annotation_by_id(ann.target).to_dict()["value"])
    print("\n\n")

{'type': 'toward_tox', 'head': 'd8ba1156-b66f-11ec-be0a-0242ac110003', 'target': 'd8ba10de-b66f-11ec-be0a-0242ac110003', 'source': 'Stanza:v1', 'source_ID': 'd66a22e2-b66f-11ec-be0a-0242ac110003', 'attributes': {'deprel': 'det'}, 'ID': 'd8ba152a-b66f-11ec-be0a-0242ac110003'}
oesophagite
une



{'type': 'toward_tox', 'head': 'd8ba1156-b66f-11ec-be0a-0242ac110003', 'target': 'd8ba11d8-b66f-11ec-be0a-0242ac110003', 'source': 'Stanza:v1', 'source_ID': 'd66a22e2-b66f-11ec-be0a-0242ac110003', 'attributes': {'deprel': 'amod'}, 'ID': 'd8ba1642-b66f-11ec-be0a-0242ac110003'}
oesophagite
peptique



{'type': 'toward_tox', 'head': 'd8ba12c8-b66f-11ec-be0a-0242ac110003', 'target': 'd8ba1250-b66f-11ec-be0a-0242ac110003', 'source': 'Stanza:v1', 'source_ID': 'd66a22e2-b66f-11ec-be0a-0242ac110003', 'attributes': {'deprel': 'case'}, 'ID': 'd8ba16c4-b66f-11ec-be0a-0242ac110003'}
grade
de



{'type': 'toward_tox', 'head': 'd8ba1156-b66f-11ec-be0a-0242ac110003', 'target': 'd8ba12c8-b66f-11ec-be0a-0242ac110

##### To BRAT

In [28]:
brat.save_to_brat(list_of_documents = annotated_pymedext_documents_toward_tox,
                  folder_path = cfg["brat_dirs"]["toward_tox"],
                  pym_ann_types=["QuickUMLS", "regex_grade", "regex_triggers"],
                  brat_entities_in_pym_types = ["QuickUMLS", "regex_grade", "regex_triggers"],
                  brat_attributes ={"QuickUMLS" : ["negation", "context", "hypothesis"]},
                  pym_rel_types = ["toward_tox", "Stanza_triggers"],
                  brat_ents_of_rel_in_pym_att_values = {"toward_tox" : "upos", "Stanza_triggers": "upos" },
                  brat_type_of_rel_in_pym_rel_att_values = {'toward_tox' : 'deprel', "Stanza_triggers": "deprel" },
                  level_annot = {'toward_tox' : 1, 'Stanza_triggers': 2}
                 )

1

![DEP](../images/22-04_MEDKIT.png)

### Définitions de fonctions pour confronter les spans stanza à ceux de quickumls et regex_grade

Ces fonctions nous permettent de relier directement les entités toxicité et grade

In [27]:
def creats_dict_span0(doc,ann_type = "regex_grade") :
    dict_span = {}
    list_int_ann = doc.get_annotations(_type = ann_type)
    for ann in list_int_ann :
        dict_span[ann.span[0]] = ann.ID
    return dict_span

In [28]:
def returns_tox_ann(doc, dict_span0, rel_list):
    """
    """
    ann_ID = False
    for rel in rel_list :
        ann_span0 = doc.get_annotation_by_id(rel.head).span[0]
        if ann_span0 in dict_span0 :
            ann_ID =  dict_span0[ann_span0]
            break
    return ann_ID

### Parcours des documents et ajouts de relations

In [29]:
for doc in annotated_pymedext_documents2:
    list_span_annQ = []
    quickumls_anns = doc.get_annotations(_type="QuickUMLS")
    for annQ in quickumls_anns:
        list_span_annQ.append(annQ.span)
    stanza_annots = doc.get_annotations(_type="Stanza_tox")
    dict_spans={}
    for annS in stanza_annots :
        dict_spans[annS.span] = annS.ID
    list_of_lists_span_co_SQ = []
    for Qspan in list_span_annQ :
        list_of_lists_span_co_SQ.append(select_stanza_ann_with_span(list_spans=[key for key in dict_spans], tox_span=Qspan))
    list_of_relations_list =[]
    for list_span in list_of_lists_span_co_SQ :
        for spanSQ in list_span :
            relations_list = []
            mine_relations_recursivly(ID_of_ann=dict_spans[spanSQ],document=doc, relations_list = relations_list)
            list_of_relations_list.append(relations_list)
    ## to onto 
    dict_quickumls_span0 = creats_dict_span0(doc, ann_type = "QuickUMLS") # to onto
    dict_grade_span0 = creats_dict_span0(doc, ann_type = "regex_grade") # to onto pour le grade
    for rel_list in list_of_relations_list :
        if rel_list :
            quickumls_ann_ID = returns_tox_ann(doc, dict_quickumls_span0, rel_list) # to onto
            if not quickumls_ann_ID :
                continue
                #print(rel_list)
                #for rel in rel_list :
                    #print("\t stanza : ", doc.get_annotation_by_id(rel.head).span[0])
                    #print("\t stanza : ", doc.get_annotation_by_id(rel.head).attributes["text"])
                    #print("\t quickumls : ",dict_quickumls_span0.keys())
                    #for key in dict_quickumls_span0 :
                        #print("\t\t Quickumls :")
                        #print("\t\t",key, doc.get_annotation_by_id(dict_quickumls_span0[key]).value)
                        #print("\t\t", key, doc.get_annotation_by_id(dict_quickumls_span0[key]).attributes["snippet"])
                    #print("\n")
                #print("STOP \n\n")
            else :
                #quickumls_ann = doc.get_annotation_by_id(quickumls_ann_ID)
                for rel in rel_list :
                    target_ann_span0 = doc.get_annotation_by_id(rel.target).to_dict()["span"][0] 
                    if target_ann_span0 in dict_grade_span0 :
                        grade_ann_ID = dict_grade_span0[target_ann_span0]
                        ## On transforme le type des annotations en to_onto 
                        #grade_ann = doc.get_annotation_by_id(grade_ann_ID)
                        #grade_ann.type = "to_onto"
                        #quickumls_ann.type = "to_onto"
                        attributes = {"objProp" : "hasGrade"}
                        rel = Relation(head = quickumls_ann_ID,
                                       target = grade_ann_ID,
                                       type = "to_onto",
                                       attributes = attributes,
                                       source = 'Stanza_tox',
                                       source_ID = rel.ID
                                      )
                        doc.relations.append(rel)
            

##### Visualisation des relations :

In [30]:
for rel in annotated_pymedext_documents2[0].get_relations('to_onto'):
    print(rel.to_dict())
    print('\n\n')
    print(annotated_pymedext_documents2[0].get_annotation_by_id(rel.head).to_dict())
    print('\n')
    print(annotated_pymedext_documents2[0].get_annotation_by_id(rel.target).to_dict())

{'type': 'to_onto', 'head': 'cc1114fa-b5c9-11ec-8058-0242ac110003', 'target': 'cc151924-b5c9-11ec-8058-0242ac110003', 'source': 'Stanza_tox', 'source_ID': 'ce5b1f30-b5c9-11ec-8058-0242ac110003', 'attributes': {'objProp': 'hasGrade'}, 'ID': 'fd321a8e-b5c9-11ec-8058-0242ac110003'}



{'type': 'QuickUMLS', 'value': 'oesophagite', 'ngram': None, 'span': (24, 35), 'source': 'QuickUMLS:v1', 'source_ID': 'cc0d5c20-b5c9-11ec-8058-0242ac110003', 'isEntity': False, 'attributes': {'hypothesis': 'certain', 'context': 'patient', 'negation': 'aff', 'cui': 'C0014868', 'label': 'oesophagite', 'semtypes': ['T047'], 'score': 1.0, 'snippet': 'Le patient présente une oesophagite peptique de grade 2', 'snippet_span': (0, 55)}, 'ID': 'cc1114fa-b5c9-11ec-8058-0242ac110003'}


{'type': 'regex_grade', 'value': 'grade 2', 'ngram': None, 'span': (48, 55), 'source': 'RegexMatcher:v1', 'source_ID': 'cc0e1c64-b5c9-11ec-8058-0242ac110003', 'isEntity': True, 'attributes': {'version': 'v1', 'label': 'Grade', 'id_regex

Maintenant, les entités tox et grade sont directement reliées (Je n'avais pas fait de visualisation Brat spécifique pour ça)

Pour la cellule ci-dessous, mettre l'ID d'une head de relations ci-dessus

In [29]:
for rel in annotated_pymedext_documents2[0].get_relations(head_id="41b12044-b5c3-11ec-acb9-0242ac110003"):
    print(rel.to_dict())
    print(annotated_pymedext_documents2[0].get_annotation_by_id(rel.head).to_dict())
    print(annotated_pymedext_documents2[0].get_annotation_by_id(rel.target).to_dict())

{'type': 'to_onto', 'head': '41b12044-b5c3-11ec-acb9-0242ac110003', 'target': '4211e79e-b5c3-11ec-acb9-0242ac110003', 'source': 'Stanza_tox', 'source_ID': '5116c14c-b5c3-11ec-acb9-0242ac110003', 'attributes': {'objProp': 'hasGrade'}, 'ID': '5554ebf8-b5c3-11ec-acb9-0242ac110003'}
{'type': 'QuickUMLS', 'value': 'oesophagite', 'ngram': None, 'span': (546, 557), 'source': 'QuickUMLS:v1', 'source_ID': '419f57e2-b5c3-11ec-acb9-0242ac110003', 'isEntity': False, 'attributes': {'hypothesis': 'certain', 'context': 'patient', 'negation': 'aff', 'cui': 'C0014868', 'label': 'oesophagite', 'semtypes': ['T047'], 'score': 1.0, 'snippet': ' La fibroscopie oeso-gastro-duodénale avait révélé  une oesophagite peptique de grade II et a permis l’exérèse d’un petit papillome du tiers supérieur de l’œsophage', 'snippet_span': (490, 654)}, 'ID': '41b12044-b5c3-11ec-acb9-0242ac110003'}
{'type': 'regex_grade', 'value': 'grade II', 'ngram': None, 'span': (570, 578), 'source': 'RegexMatcher:v1', 'source_ID': '41a1

# Pour exécuter les cellules ci-dessous, il est nécessaire d'avoir Owlready.

## Owlready 

In [29]:
CUI = PYM["CUI"]
onto = get_ontology("http://toto.org/" + "#")
with onto :
    class X(PYM.Concept): pass
    cui_class = CUI["C0014868"]
    ind1 = X()
    ind1.is_a.append(cui_class)   

### Créer onto

In [30]:
timestr = time.strftime("%Y%m%d-%H%M%S")
UMLS_CONCEPT = PYM["CUI"]


onto = get_ontology("http://OntoTox_vtest.org/" + timestr + "#")

with onto:
    
    
    
    # 1 Définition des classes :
    class ChemotherapyToxicity(Thing): pass
    
    class Grade(Thing): pass
    class Grade0(Grade): pass
    class Grade1(Grade): pass
    class Grade2(Grade): pass
    class Grade3(Grade): pass
    class Grade4(Grade): pass
    class Grade5(Grade): pass
    class GradeNull(Grade): pass
    
    #class MedTermPhenotype(Thing): pass
    #class CTCAEPhenotype(Thing)
    #class CUI(Thing): pass
    
    class Qualifier(Thing): pass
    
    class StartDate(Thing): pass
    class RelativeStartDate(StartDate): pass
    class AbsoluteStartDate(StartDate): pass
    
    AllDisjoint([RelativeStartDate, AbsoluteStartDate])
    
    class Drug(Thing): pass
    
    AllDisjoint([ChemotherapyToxicity, Qualifier, StartDate, Drug])
    
    
    # 2 Définition des propriétés
    class hasGrade(ChemotherapyToxicity >> Grade, FunctionalProperty): pass
    class isHypothetical(ChemotherapyToxicity >> bool, FunctionalProperty): pass
    class isNegative(ChemotherapyToxicity >> bool, FunctionalProperty): pass
    class concernsPatient(ChemotherapyToxicity >> bool, FunctionalProperty): pass
    class hasValueInText(ChemotherapyToxicity >> str, FunctionalProperty): pass
    #class hasCUI(ChemotherapyToxicity >> CUI, FunctionalProperty): pass
    class associatedWithTreatment(ChemotherapyToxicity >> Drug): pass
    class hasRelativeStartDate(ChemotherapyToxicity >> RelativeStartDate): pass
    class hasAbsoluteStartDate(ChemotherapyToxicity >> AbsoluteStartDate): pass
    class isHypothetical(Grade >> bool, FunctionalProperty): pass
    class isNegative(Grade >> bool, FunctionalProperty): pass
    class concernsPatient(Grade >> bool, FunctionalProperty): pass
    class hasValueInText(Grade >> str, FunctionalProperty): pass

timestr = time.strftime("%Y%m%d-%H%M%S")    
path_owl = "ontologies/"
name_owl = timestr + "_empty.owl"    
onto.save(path_owl + name_owl)


### Remplir onto avec collection de docs 

In [31]:
onto = get_ontology(path_owl + name_owl).load()
with onto :
    # 1er parcours : création des individus tox
    for doc in annotated_pymedext_documents2:
        anns_tox_quickumls = doc.get_annotations(_type="QuickUMLS")
        for ann_tox in anns_tox_quickumls :
            # classe concept UMLS
            cui_code = ann_tox.attributes["cui"]
            cui_label = ann_tox.attributes["label"]
            cui_class = UMLS_CONCEPT[cui_code]
            # classe toxicité
            tox_hyp = ann_tox.attributes["hypothesis"] == "hypothesis"
            tox_neg = ann_tox.attributes["negation"] == "neg"
            tox_pat = ann_tox.attributes["context"] == "patient"
            tox_value = ann_tox.value
            name_ind_tox = ann_tox.ID
            # L'individu tox instancie les deux classes
            name_ind_tox = ChemotherapyToxicity(name_ind_tox,
                                               isHypothetical = tox_hyp,
                                               isNegative = tox_neg,
                                               concernsPatient = tox_pat,
                                               hasValueInText = tox_value)
            name_ind_tox.is_a.append(cui_class)
            #name_ind_tox = cui_class()
            name_ind_tox.label.append(tox_value)
             
            
timestr = time.strftime("%Y%m%d-%H%M%S")    
path_owl = "ontologies/"
name_owl = timestr + "_cui.owl"    
onto.save(path_owl + name_owl)

In [32]:
dict_class_grade = {'0': 'Grade0',
                  '1': 'Grade1',
                  '2': 'Grade2',
                  '3': 'Grade3',
                  '4': 'Grade4',
                  '5': 'Grade5',
                  'null':'GradeNull'}

onto = get_ontology(path_owl + name_owl).load()
with onto :
    # 2ème parcours pour les relations
    for doc in annotated_pymedext_documents2:
        relations_tox = doc.get_relations(_type="to_onto")
        for rel in relations_tox :
            if rel.attributes["objProp"] == "hasGrade":
                # créer les individus grades
                ann_grade = doc.get_annotation_by_id(rel.target)
                ClassGrade = onto[dict_class_grade[ann_grade.attributes["normalized_num"]]]
                grade_hyp = ann_grade.attributes["hypothesis"] == "hypothesis"
                grade_neg = ann_grade.attributes["negation"] == "neg"
                grade_pat = ann_grade.attributes["context"] == "patient"
                grade_value = ann_grade.value
                name_ind_grade = ann_grade.ID
                name_ind_grade = ClassGrade(name_ind_grade,
                                          isHypothetical = grade_hyp,
                                          isNegative = grade_neg,
                                          concernsPatient = grade_pat,
                                          hasValueInText = grade_value)
                ann_tox_ID = doc.get_annotation_by_id(rel.head).to_dict()["ID"]
                name_ind_tox = getattr(onto, ann_tox_ID)
                #print(getattr(onto, ann_tox_ID))
                #ind = getattr(onto, ann_tox_ID)
                #print(ind)
                #ind.hasGrad(name_ind_grad)
                hasGrade[name_ind_tox].append(name_ind_grade)
                name_ind_grade.label.append(grade_value)
            
timestr = time.strftime("%Y%m%d-%H%M%S")    
path_owl = "ontologies/"
name_owl = timestr + "_grade.owl"    
onto.save(path_owl + name_owl)

20210503-162309_empty.c429913e-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c4cc32a4-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5cd1452-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5cd14c0-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5cd151a-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5d518fa-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5d51a62-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5d51ae4-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5d51ae4-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c5d51bb6-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6a29956-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6c2099e-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6c56e54-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6c56e54-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6c56f8a-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c6c94bfa-ac2b-11eb-acdd-0242ac110002
20210503-162309_empty.c9bcbd38-ac2b-11eb-acdd-0242ac1100

## Brouillon

In [27]:
v1 = "a"
v2 = "b"
v3 = "a"

var_bool = v1 == v3

In [28]:
var_bool

True

In [43]:
a="grade1"
b="g1"
c="G1"
d="grade 1"
e="gradeI"
f="gI "
g="GI "
h="grade I"
i="grade2"
j="g2"
k="G2"
l="grade 2"
m="gradeII "
n="gII"
o="GII"
p="grade IV"
q="gradeIV"
r="gIV"
s="GIV"
t="grade V"
u="grade V"
v="gradeV"
w="gV"
x="GV"
y="grade V"
z="gI"
z1="GI"
z2 = "G3-4"
z3 = "grade1-2"
z4="grade un"
z5="g un"
z6= "g DEUX"
liste=[a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z1,z2,z3,z4,z5,z6]

In [45]:
for i in liste:
    print(i)
    print(re.search("([0-5]|III|IV|II|I|V)", i).group(0))

grade1
1
g1
1
G1
1
grade 1
1
gradeI
I
gI 
I
GI 
I
grade I
I
grade2
2
g2
2
G2
2
grade 2
2
gradeII 
II
gII
II
GII
II
grade IV
IV
gradeIV
IV
gIV
IV
GIV
IV
grade V
V
grade V
V
gradeV
V
gV
V
GV
V
grade V
V
GI
I
G3-4
3
grade1-2
1
grade un


AttributeError: 'NoneType' object has no attribute 'group'

In [31]:
re.search("[Gg][Rr][Aa][Dd][Ee]\\s*[0-5]*[I|V]*|[Gg]\\s*([[0-5]|(III|IV|II|I|V))", "grade1-2")

<re.Match object; span=(0, 6), match='grade1'>

In [47]:
j=1
for i in liste:
    print(i)
    if (re.search("[Gg][Rr][Aa][Dd][Ee]\\s*[0-5]*[I|V]*|[Gg]\\s*([[0-5]|(III|IV|II|I|V)|(un|deux|trois|quatre|cinq))", i)) is None :
        print('pb',i)
    else :
        print(re.search("[Gg][Rr][Aa][Dd][Ee]\\s*[0-5]*[I|V]*|[Gg]\\s*([[0-5]|(III|IV|II|I|V)|(un|deux|trois|quatre|cinq))", i))
        tata=re.search("[Gg][Rr][Aa][Dd][Ee]\\s*[0-5]*[I|V]*|[Gg]\\s*([[0-5]|(III|IV|II|I|V)|\\s+(un|deux|trois|quatre|cinq))", i).group(0)
        print(re.search("([0-5]|III|IV|II|I|V|(un|deux|trois|quatre|cinq))", tata).group(0))


grade1
<re.Match object; span=(0, 6), match='grade1'>
1
g1
<re.Match object; span=(0, 2), match='g1'>
1
G1
<re.Match object; span=(0, 2), match='G1'>
1
grade 1
<re.Match object; span=(0, 7), match='grade 1'>
1
gradeI
<re.Match object; span=(0, 6), match='gradeI'>
I
gI 
<re.Match object; span=(0, 2), match='gI'>
I
GI 
<re.Match object; span=(0, 2), match='GI'>
I
grade I
<re.Match object; span=(0, 7), match='grade I'>
I
grade2
<re.Match object; span=(0, 6), match='grade2'>
2
g2
<re.Match object; span=(0, 2), match='g2'>
2
G2
<re.Match object; span=(0, 2), match='G2'>
2
grade 2
<re.Match object; span=(0, 7), match='grade 2'>
2
gradeII 
<re.Match object; span=(0, 7), match='gradeII'>
II
gII
<re.Match object; span=(0, 3), match='gII'>
II
GII
<re.Match object; span=(0, 3), match='GII'>
II
grade IV
<re.Match object; span=(0, 8), match='grade IV'>
IV
gradeIV
<re.Match object; span=(0, 7), match='gradeIV'>
IV
gIV
<re.Match object; span=(0, 3), match='gIV'>
IV
GIV
<re.Match object; span=(0, 3), 

  


AttributeError: 'NoneType' object has no attribute 'group'

In [88]:
len(liste)

25