In [2]:
import pandas as pd
from ephesus.sentence import load_model, return_label
from ephesus.nlp import TrainerNGAP
from ephesus.data import get_data_json, get_data_targets_json

# try out the model

In [3]:
path = "../models/model_v2/model-best"

model = load_model(path)

In [4]:
sentence = "prise de sang faite ce jour à 18h"

words = return_label(sentence, model)

In [5]:
words

[('prise de sang', 'Treatment'), ('ce jour', 'Date'), ('18h', 'Time')]

# apply the model to get the treatments

In [6]:
df = get_data_json()
df.head()

Unnamed: 0,fichier,translation
0,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,"Nouvelle ordonnance pour madame Vilain, valabl..."
1,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,"Prise de sang réalisée au domicile, ce jour, l..."
2,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,Prise de sang réalisée le 12 mai puis le 9 jui...
3,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,"Injection intramusculaire cet après-midi, pas ..."
4,9f980dcf-b431-4e67-876f-2b8e288b7900_777f3b22-...,Test PCR remboursable fait le 8 février à 11h1...


In [7]:
df["entities"] = df["translation"].apply(lambda x: return_label(x, model))
df.head()

Unnamed: 0,fichier,translation,entities
0,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,"Nouvelle ordonnance pour madame Vilain, valabl...","[(valable 6 mois, Duration), (domicile, Locati..."
1,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,"Prise de sang réalisée au domicile, ce jour, l...","[(Prise de sang, Treatment), (domicile, Locati..."
2,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,Prise de sang réalisée le 12 mai puis le 9 jui...,"[(Prise de sang, Treatment), (12 mai, Date), (..."
3,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,"Injection intramusculaire cet après-midi, pas ...","[(Injection intramusculaire, Treatment), (cet ..."
4,9f980dcf-b431-4e67-876f-2b8e288b7900_777f3b22-...,Test PCR remboursable fait le 8 février à 11h1...,"[(Test PCR, Treatment), (8 février, Date), (11..."


In [8]:
df["entities"][0]

[('valable 6 mois', 'Duration'),
 ('domicile', 'Location'),
 ('une fois par mois', 'Frequency'),
 ('prise de sang', 'Treatment'),
 ('prise de sang', 'Treatment'),
 ('27 juillet', 'Date'),
 ('26 octobre', 'Date'),
 ('30 novembre', 'Date'),
 ('28 décembre', 'Date'),
 ('11h', 'Time')]

In [9]:
def keep_treatment_only(entities):
    return [entity[0] for entity in entities if entity[1] == "Treatment"]

In [10]:
df["treatments"] = df["entities"].apply(keep_treatment_only)
df.head()

Unnamed: 0,fichier,translation,entities,treatments
0,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,"Nouvelle ordonnance pour madame Vilain, valabl...","[(valable 6 mois, Duration), (domicile, Locati...","[prise de sang, prise de sang]"
1,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,"Prise de sang réalisée au domicile, ce jour, l...","[(Prise de sang, Treatment), (domicile, Locati...",[Prise de sang]
2,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,Prise de sang réalisée le 12 mai puis le 9 jui...,"[(Prise de sang, Treatment), (12 mai, Date), (...",[Prise de sang]
3,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,"Injection intramusculaire cet après-midi, pas ...","[(Injection intramusculaire, Treatment), (cet ...",[Injection intramusculaire]
4,9f980dcf-b431-4e67-876f-2b8e288b7900_777f3b22-...,Test PCR remboursable fait le 8 février à 11h1...,"[(Test PCR, Treatment), (8 février, Date), (11...",[Test PCR]


In [11]:
df_split = pd.DataFrame(df["treatments"].to_list())
df_split.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,prise de sang,prise de sang,,,,,,,
1,Prise de sang,,,,,,,,
2,Prise de sang,,,,,,,,
3,Injection intramusculaire,,,,,,,,
4,Test PCR,,,,,,,,


In [12]:
df_split["filename"] = df["fichier"]
df_split.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,filename
0,prise de sang,prise de sang,,,,,,,,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...
1,Prise de sang,,,,,,,,,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...
2,Prise de sang,,,,,,,,,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...
3,Injection intramusculaire,,,,,,,,,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...
4,Test PCR,,,,,,,,,9f980dcf-b431-4e67-876f-2b8e288b7900_777f3b22-...


In [13]:
df_stack = pd.DataFrame(df_split.set_index("filename").stack()).reset_index().rename(columns={0 : "treatment"})
df_stack.head()

Unnamed: 0,filename,level_1,treatment
0,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,0,prise de sang
1,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,1,prise de sang
2,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,0,Prise de sang
3,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,0,Prise de sang
4,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,0,Injection intramusculaire


# now it's time to get the targets

In [14]:
df_targets = get_data_targets_json()[["fichier", "NGAP_1"]]
df_targets.head()

Unnamed: 0,fichier,NGAP_1
0,2d035c4b-cdfa-4982-87dc-916fe07a0824_249f1d30-...,PSG
1,2d035c4b-cdfa-4982-87dc-916fe07a0824_019a0add-...,PC19
2,55d674cc-3389-4cf6-ab7c-1f1b9fa1b6ed_02a55241-...,TAID19
3,9f980dcf-b431-4e67-876f-2b8e288b7900_1a26bbf2-...,TAIC19
4,2d035c4b-cdfa-4982-87dc-916fe07a0824_545d42f0-...,PSG


In [15]:
# clean filename for merge
def clean_filename(filename):
    return filename[:-17] if "translation.json" in filename else filename[:-16]
df_stack["filename_cleaned"] = df_stack["filename"].apply(clean_filename)
df_targets["filename_cleaned"] = df_targets["fichier"].apply(clean_filename)
# merge
df = df_stack.merge(df_targets, how="left", on="filename_cleaned")
df.head()

Unnamed: 0,filename,level_1,treatment,filename_cleaned,fichier,NGAP_1
0,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,0,prise de sang,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,PSG
1,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,1,prise de sang,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,3e7ffb67-8872-4172-bf18-4fe338ecae3f_64e118fc-...,PSG
2,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,0,Prise de sang,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,2d035c4b-cdfa-4982-87dc-916fe07a0824_50e8d80a-...,PSG
3,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,0,Prise de sang,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,f0caa21b-c11d-40a3-98ec-e2df3d2b9cc4_3ab44a1b-...,PSG
4,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,0,Injection intramusculaire,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,a5050600-95ef-43d8-a5fa-a57c791bf843_2af70417-...,IM


In [16]:
df = df[["treatment", "NGAP_1"]].rename(columns={"treatment" : "X", "NGAP_1" : "y"})
df.head()

Unnamed: 0,X,y
0,prise de sang,PSG
1,prise de sang,PSG
2,Prise de sang,PSG
3,Prise de sang,PSG
4,Injection intramusculaire,IM


# now that I added this to the package, I can call TrainerNGAP()

In [17]:
trainer = TrainerNGAP()

In [18]:
path = "../models/model_v2/model-best"
trainer.get_training_data(path)

In [19]:
trainer.train_ngap()

2022-06-03 16:42:52.506678: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-06-03 16:42:52.506702: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-03 16:42:52.506718: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (think): /proc/driver/nvidia/version does not exist
2022-06-03 16:42:52.506907: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


In [20]:
score = trainer.eval_ngap()
score



0.6920391321182251

In [21]:
df_pred = trainer.predict_ngap()
df_pred.head()



Unnamed: 0,X,NGAP,softmax
0,"[prise, sang]",PSG,0.811144
1,"[test, antigénique]",TAID19,0.488484
2,"[prise, sang]",PSG,0.811144
3,"[pprise, sang]",PSG,0.600769
4,"[pansement, simple]",PSTA,0.82505
