In [91]:
import pandas as pd
import os

import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime

from spacy import displacy
import edsnlp, edsnlp.pipes as eds

input_dir = "../data_clean/"
visuals_dir = "../visuals/" 

In [92]:
df_person = pd.read_pickle(os.path.join(input_dir, 'df_person.pkl'))
df_bio = pd.read_pickle(os.path.join(input_dir, 'df_bio.pkl'))
df_note = pd.read_pickle(os.path.join(input_dir, 'df_note.pkl'))
df_visit = pd.read_pickle(os.path.join(input_dir, 'df_visit.pkl'))
df_condition = pd.read_pickle(os.path.join(input_dir, 'df_condition.pkl'))
df_facteur_risque = pd.read_pickle(os.path.join(input_dir, 'df_facteur_risque.pkl'))

In [93]:
nlp_smoker = edsnlp.blank("eds")
nlp_smoker.add_pipe(eds.sentences())
nlp_smoker.add_pipe(eds.normalizer())
nlp_smoker.add_pipe(eds.tobacco())
nlp_smoker.add_pipe(eds.negation())
nlp_smoker.add_pipe(eds.family())

nlp_alcool = edsnlp.blank("eds")
nlp_alcool.add_pipe(eds.sentences())
nlp_alcool.add_pipe(eds.normalizer()) 
nlp_alcool.add_pipe(eds.alcohol())
nlp_alcool.add_pipe(eds.negation())
nlp_alcool.add_pipe(eds.family())

terms = dict(
    cancer_sein = [
    "pilule",
    "sterilet",
    "preservatif",
    "implant",
    "patch",
    "anneau",
    "diaphragme",
    "spermicide",
    "ligature"
]
)
nlp_contraception = edsnlp.blank("eds")
nlp_contraception.add_pipe(eds.sentences())
nlp_contraception.add_pipe(eds.normalizer())
nlp_contraception.add_pipe(eds.matcher(
    terms=terms,
    attr="NORM",
))
nlp_contraception.add_pipe(eds.family())
nlp_contraception.add_pipe(eds.negation())




<edsnlp.pipes.qualifiers.negation.negation.NegationQualifier at 0x14001e310>

In [94]:
df_facteur_risque['fumeur'] = False

smokers, total = 0, 0
for i in range(len(df_note)):
    note = df_note.iloc[i]
    text = note['note_text']
    doc = nlp_smoker(text)

    filtered_ents_for_display = [ent for ent in doc.ents if not ent._.family]
    filtered_ents_for_display = [ent for ent in filtered_ents_for_display if not ent._.negation]

    visit_id_from_sampled_note = note['visit_occurrence_id']
    matching_visit_rows = df_visit[df_visit['visit_occurrence_id'] == visit_id_from_sampled_note]

    current_note_id_scalar = note['note_id']
    person_id = matching_visit_rows['person_id'].iloc[0]
   
    total += 1
    if filtered_ents_for_display: 
        smokers += 1
        df_facteur_risque.loc[df_facteur_risque['person_id'] == person_id, 'fumeur'] = True

print(f"Total notes: {total}")
print(f"Total fumeurs: {smokers}")

pd.to_pickle(df_facteur_risque, os.path.join(input_dir, 'df_facteur_risque.pkl'))

Total notes: 996
Total fumeurs: 90


In [95]:
df_facteur_risque['alcool'] = False

alcohol, total = 0, 0

for i in range(len(df_note)):
    note = df_note.iloc[i]
    text = note['note_text']
    doc = nlp_alcool(text)

    filtered_ents_for_display = [ent for ent in doc.ents if not ent._.family]
    filtered_ents_for_display = [ent for ent in filtered_ents_for_display if not ent._.negation]

    visit_id_from_sampled_note = note['visit_occurrence_id']
    matching_visit_rows = df_visit[df_visit['visit_occurrence_id'] == visit_id_from_sampled_note]

    current_note_id_scalar = note['note_id']
    person_id = matching_visit_rows['person_id'].iloc[0]
   
    total += 1
    
    if filtered_ents_for_display:
        alcohol += 1
        #df_facteur_risque.loc[df_facteur_risque['person_id'] == person_id, 'alcool'] = True
        # On fait le choix de ne pas mettre à jour le df_facteur_risque pour l'alcool avec le NLP, car il y a trop de faux positifs du a une ligne presente dans le
        # traitement conseillé au patient
        
        
print(f"Total notes: {total}")
print(f"Total consomateurs d'alcool: {alcohol}")



Total notes: 996
Total consomateurs d'alcool: 22


In [96]:
df_facteur_risque['contraception'] = False

contraception, total = 0, 0

for i in range(len(df_note)):
    note = df_note.iloc[i]
    text = note['note_text']
    doc = nlp_contraception(text)

    filtered_ents_for_display = [ent for ent in doc.ents if not ent._.family]
    filtered_ents_for_display = [ent for ent in filtered_ents_for_display if not ent._.negation]

    visit_id_from_sampled_note = note['visit_occurrence_id']
    matching_visit_rows = df_visit[df_visit['visit_occurrence_id'] == visit_id_from_sampled_note]

    current_note_id_scalar = note['note_id']
    person_id = matching_visit_rows['person_id'].iloc[0]
   
    total += 1

    if filtered_ents_for_display:
        contraception += 1
        df_facteur_risque.loc[df_facteur_risque['person_id'] == person_id, 'contraception'] = True
        
print(f"Total notes: {total}")
print(f"Total utilisatrices de contraception: {contraception}")

Total notes: 996
Total utilisatrices de contraception: 0
