# Transformation of tabular dataset into SPHN RDF data
The model is slightly modified to be more patient centric. 

In [2]:
!pip install pandas rdflib scipy
import pandas as pd
import numpy as np
import joblib
import re
import uuid
from rdflib import ConjunctiveGraph
from string import Template
from itertools import accumulate
from scipy.stats import norm, genextreme, exponweib
from datetime import datetime, date, timedelta
from IPython.display import display



In [3]:
df = pd.read_csv('./syn_data.csv', index_col=0)
df.rename(columns={'output': 'outcome'}, inplace=True)

# Shuffling dataframe with a random seed
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df

Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,outcome,entry_code,ica,ttt,ica_therapy,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
0,11.525542,18.050848,3.561477,0,1,0.0,2,1,1,0,...,0,46.653842,22,-1,-1,-1,-1,-1,-1,-1
1,4.096719,17.164788,20.830227,0,5,0.0,3,8,2,0,...,0,62.536000,25,-1,101,-1,-1,73,-1,49
2,92.015036,18.158804,29.897650,1,1,1.0,7,10,1,0,...,0,49.631746,-1,-1,-1,-1,-1,24,-1,54
3,66.217942,17.936781,45.870606,0,2,1.0,6,6,2,0,...,1,68.491810,-1,-1,-1,-1,-1,18,-1,44
4,25.694681,18.088936,4.813020,0,1,1.0,1,6,2,0,...,0,73.454985,23,-1,-1,-1,57,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9.744466,19.372323,106.752080,1,2,1.0,4,6,1,0,...,0,50.624381,13,-1,74,-1,-1,57,-1,35
9996,10.676763,15.452208,45.423368,0,1,0.0,0,0,1,0,...,0,39.705397,-1,-1,-1,-1,-1,18,-1,41
9997,33.567485,16.731739,18.703820,0,6,2.0,2,3,1,0,...,0,51.617016,22,86,-1,-1,-1,67,-1,49
9998,9.812358,18.074602,7.796590,0,4,0.0,1,2,2,0,...,0,72.462350,22,-1,102,-1,-1,83,-1,44


In [22]:
#size_train = int(len(df) * 0.8)

In [23]:
numerical = ['hospital_stay_length', 'gcs', 'nb_acte', 'age']
categorical = ['gender', 'entry', 'entry_code', 'ica', 'ttt', 'ica_therapy', 'fever', 'o2_clinic', 'o2', 'hta', 'hct', 'tabagisme', 'etOH', 'diabete', 'headache', 'instable', 'vasospasme', 'ivh', 'outcome']

# phenotypes = ['gender', 'ica', 'fever', 'hta', 'hct', 'tabagisme', 'etOH', 'diabete', '']

events = ['nimodipine',  'paracetamol', 'nad', 'corotrop', 'morphine', 'dve', 'atl', 'iot']

drug_events = [
    "nimodipine",
    "paracetamol",
    "nad",
    "corotrop",
    "morphine",
]

proc_events = [
    "dve",
    "atl",
    "iot"
]

events_codes = {
    "nimodipine": "C08CA06",  # ACT / drug administration event
    "paracetamol": "N02BE01",  # ACT / drug administration event
    "nad": "C01CA03",  # ACT / drug administration event
    "corotrop": "C01CE02",  # ACT / drug administration event
    "morphine": "N02AA01",  # ACT / drug administration event
    "dve": "00P6X0Z",  # Removal of Drainage Device from Cerebral Ventricle External Approach (ICD-10) / procedure
    "atl": "Z98.6",  # ICD-10 Drainage Device from Cerebral Ventricle External Approach (ICD-10) / procedure
    "iot": "0BH17EZ",  # ICD-10 / procedure thacheotomie
}

In [24]:
prefix = """   
@prefix sphn: <http://sphn.org/> .
@prefix nvasc: <http://nvasc.org/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
"""

sphn_age_template = Template(
    """
    nvasc:age_$age_id a sphn:Age ;
        sphn:hasDeterminationDateTime "$age_determination_date"^^xsd:dateTime ;
        sphn:hasQuantity [ rdf:type sphn:Quantity ;
                            sphn:hasValue "$age_value" ;
                            sphn:hasUnit "years" ] .
                            
    nvasc:synth_patient_$patient_id nvasc:hasAge nvasc:age_$age_id .
    """
)

sphn_gender_template = Template(
    """
    nvasc:gender_$gender_id a sphn:AdministrativeGender ;
        sphn:hasCode nvasc:code_$gender_code .
        
    nvasc:synth_patient_$patient_id nvasc:hasGender nvasc:gender_$gender_id .
    """
)

sphn_procedure_template = Template(
    """
    nvasc:$proc_id a sphn:Pocedure ;
        rdfs:label "$proc_label"^^xsd:string ;
        sphn:hasCode nvasc:code_$proc_code ;
        sphn:hasStartDateTime "$proc_start_date"^^xsd:dateTime .
        
    nvasc:synth_patient_$patient_id nvasc:hasProcedure nvasc:$proc_id .
    """
)

sphn_drug_administration_template = Template(
    """
    nvasc:$drug_adm_id a sphn:DrugAdministrationEvent ;
        rdfs:label "$drug_adm_label"^^xsd:string ;
        sphn:hasDrug nvasc:drug_$drug_code ;
        sphn:hasStartDateTime "$drug_start_date"^^xsd:dateTime .
    
    nvasc:synth_patient_$patient_id nvasc:hasDrugAdministrationEvent nvasc:$drug_adm_id .
    """
)

sphn_timed_diagnosis_code_template = Template(
    """
    nvasc:$diag_id a sphn:Diagnosis ;
        rdfs:label "$diag_label"^^xsd:string ;
        sphn:hasCode nvasc:code_$diag_code ;
        sphn:hasRecordDateTime "$diag_date"^^xsd:dateTime .
        
    nvasc:synth_patient_$patient_id nvasc:hasDiagnosis nvasc:$diag_id .
    """
)

sphn_diagnosis_code_template = Template(
    """
    nvasc:$diag_id a sphn:Diagnosis ;
        rdfs:label "$diag_label"^^xsd:string ;
        sphn:hasCode nvasc:code_$diag_code .
        
    nvasc:synth_patient_$patient_id nvasc:hasDiagnosis nvasc:$diag_id .
    """
)

sphn_diagnosis_quantity_template = Template(
    """
    nvasc:$diag_id a sphn:Diagnosis ;
        rdfs:label "$diag_label" ;
        sphn:hasQuantity [ rdf:type sphn:Quantity ;
                            sphn:hasValue "$diag_value" ;
                            sphn:hasUnit "$diag_unit" ] .
    
    nvasc:synth_patient_$patient_id nvasc:hasDiagnosis nvasc:$diag_id .
    """
)

nvasc_outcome = Template(
    """
    nvasc:synth_patient_$patient_id nvasc:hasOutcome nvasc:outcome_$outcome .
    """
)


def gen_start_event(y_min=2020, y_max=2023):
    n_days = (y_max - y_min) * 365
    d0 = datetime.fromisoformat(f"{y_min}-01-01")
    day_rand = round(np.random.uniform(n_days))
    delta = timedelta(
        days=day_rand,
        hours=round(norm.rvs(12, 5)),
        minutes=round(np.random.uniform(60)),
    )
    d_out = d0 + delta
    return d_out


def gen_patient_rdf(row, kg):
    _i = row.name
    d_start = gen_start_event()
    for f in row.index:
        if f in drug_events:
            # print("drug_event : "+str(f))
            if row[f] != -1:
                h = row[f]
                d_event = d_start + timedelta(hours=h)
                rdf = sphn_drug_administration_template.substitute(
                    drug_adm_id=uuid.uuid4(),
                    drug_adm_label=f,
                    drug_code=events_codes[f],
                    drug_start_date=d_event.isoformat(),
                    patient_id=_i,
                )
                kg.parse(data=prefix + rdf, format="turtle")
        elif f in proc_events:
            # print("proc_event : "+str(f))
            if row[f] != -1:
                h = row[f]
                d_event = d_start + timedelta(hours=h)
                rdf = sphn_procedure_template.substitute(
                    proc_id=uuid.uuid4(),
                    proc_label=f,
                    proc_code=events_codes[f],
                    proc_start_date=d_event.isoformat(),
                    patient_id=_i,
                )
                kg.parse(data=prefix + rdf, format="turtle")

        elif f in numerical:
            value = None
            unit = None
            if f in ["age"]:
                gender_value = row[f]
                rdf = sphn_age_template.substitute(
                    patient_id=_i, age_id=_i, age_value=round(row[f]), age_determination_date=d_start.isoformat()
                )
                kg.parse(data=prefix + rdf, format="turtle")
            else:
                if f == "hospital_stay_length":
                    value = round(row[f])
                    unit = "days"
                elif f == "gcs":
                    value = round(row[f], 2)
                    unit = "gcs"
                elif f == "nb_acte":
                    value = round(row[f])
                    unit = "received medical treatments"

                rdf = sphn_diagnosis_quantity_template.substitute(
                    diag_id=uuid.uuid4(),
                    diag_label=f,
                    diag_value=value,
                    diag_unit=unit,
                    patient_id=_i,
                )
                kg.parse(data=prefix + rdf, format="turtle")

        # ['gender', 'entry', 'entry_code', 'ica', 'ttt', 'ica_therapy', 'fever', 'o2_clinic', 'o2', 'hta', 'hct', 'tabagisme', 'etOH', 'diabete', 'headache', 'instable', 'vasospasme', 'ivh']
        elif f in categorical:
            if f in ["gender"]:
                gender_value = row[f]
                rdf = sphn_gender_template.substitute(
                    patient_id=_i, gender_id=_i, gender_code=gender_value
                )
                kg.parse(data=prefix + rdf, format="turtle")
            elif f in ["outcome"]:
                outcome_value = row[f]
                rdf = nvasc_outcome.substitute(outcome=outcome_value, patient_id=_i)
                kg.parse(data=prefix + rdf, format="turtle")
            else:
                diag_label = f
                diag_code = row[f]
                rdf = sphn_diagnosis_code_template.substitute(
                    diag_id=uuid.uuid4(),
                    diag_label=f,
                    diag_code=str(f) + "_" + str(row[f]),
                    patient_id=_i,
                )

                kg.parse(data=prefix + rdf, format="turtle")

### 1000 with outcomes ; 0 without outcomes

In [18]:
N = 10
N_train = int(N)
print("size training set: "+str(N_train))

N_test = N - N_train    
print("size test set: "+str(N_test))
assert N == N_train + N_test

train_df = df.iloc[0:N]
with_outcome = df.iloc[0:N_train]
display(with_outcome)

no_outcome = df.iloc[N_train:N]
no_outcome = no_outcome.drop(columns=["outcome"])
display(no_outcome)

## Serialize data
kg = ConjunctiveGraph()
with_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
no_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
print(f"Generated {len(kg)} RDF triples")

kg.serialize("sphn_transductive_10_0.ttl", format="turtle")
kg.serialize("sphn_transductive_10_0.nt", format="nt")
joblib.dump(train_df["outcome"].astype(int).to_list(), "outcomes_10_0"+ ".joblib")

size training set: 10
size test set: 0


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,outcome,entry_code,ica,ttt,ica_therapy,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
0,11.525542,18.050848,3.561477,0,1,0.0,2,1,1,0,...,0,46.653842,22,-1,-1,-1,-1,-1,-1,-1
1,4.096719,17.164788,20.830227,0,5,0.0,3,8,2,0,...,0,62.536,25,-1,101,-1,-1,73,-1,49
2,92.015036,18.158804,29.89765,1,1,1.0,7,10,1,0,...,0,49.631746,-1,-1,-1,-1,-1,24,-1,54
3,66.217942,17.936781,45.870606,0,2,1.0,6,6,2,0,...,1,68.49181,-1,-1,-1,-1,-1,18,-1,44
4,25.694681,18.088936,4.81302,0,1,1.0,1,6,2,0,...,0,73.454985,23,-1,-1,-1,57,-1,-1,-1
5,79.326486,19.861231,57.740076,1,2,0.0,6,2,2,0,...,0,63.528635,-1,-1,-1,-1,-1,18,-1,-1
6,58.099214,16.413057,12.872591,1,2,1.0,1,6,2,0,...,0,75.440254,29,60,-1,-1,-1,-1,-1,-1
7,32.369743,18.923624,47.861638,0,5,1.0,4,2,2,0,...,0,69.484445,-1,-1,-1,-1,-1,-1,-1,29
8,21.754013,17.931466,75.72145,0,4,1.0,4,6,1,0,...,0,55.587556,-1,-1,-1,-1,-1,50,-1,26
9,35.153225,18.375651,13.895873,1,4,2.0,1,7,2,0,...,0,70.47708,21,-1,-1,-1,-1,-1,-1,38


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,entry_code,ica,ttt,ica_therapy,fever,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot


Generated 1095 RDF triples




['outcomes_10_0.joblib']

### 950 with outcomes ; 50 without outcomes

In [25]:
N = 1000
N_train = int(N * 0.95)
print("size training set: "+str(N_train))

N_test = N - N_train    
print("size test set: "+str(N_test))
assert N == N_train + N_test

train_df = df.iloc[0:N]
with_outcome = df.iloc[0:N_train]
display(with_outcome)

no_outcome = df.iloc[N_train:N]
no_outcome = no_outcome.drop(columns=["outcome"])
display(no_outcome)

size training set: 950
size test set: 50


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,outcome,entry_code,ica,ttt,ica_therapy,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
0,11.525542,18.050848,3.561477,0,1,0.0,2,1,1,0,...,0,46.653842,22,-1,-1,-1,-1,-1,-1,-1
1,4.096719,17.164788,20.830227,0,5,0.0,3,8,2,0,...,0,62.536000,25,-1,101,-1,-1,73,-1,49
2,92.015036,18.158804,29.897650,1,1,1.0,7,10,1,0,...,0,49.631746,-1,-1,-1,-1,-1,24,-1,54
3,66.217942,17.936781,45.870606,0,2,1.0,6,6,2,0,...,1,68.491810,-1,-1,-1,-1,-1,18,-1,44
4,25.694681,18.088936,4.813020,0,1,1.0,1,6,2,0,...,0,73.454985,23,-1,-1,-1,57,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
945,19.033918,21.532312,106.642836,0,1,1.0,1,6,2,0,...,0,63.528635,25,-1,-1,-1,-1,-1,-1,-1
946,9.326400,15.725852,18.830134,0,3,0.0,11,6,1,0,...,0,41.690667,-1,-1,-1,-1,69,50,-1,21
947,53.213185,15.779373,59.566413,0,4,0.0,3,1,1,0,...,0,31.764318,22,-1,-1,-1,-1,-1,-1,-1
948,28.945446,18.499537,47.904219,1,1,2.0,1,7,2,0,...,1,89.337143,20,-1,-1,-1,-1,-1,-1,-1


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,entry_code,ica,ttt,ica_therapy,fever,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
950,73.3737,20.472699,90.854523,1,1,12,3,1,0,0,...,0,53.602286,-1,75,-1,-1,-1,44,-1,21
951,93.660709,17.523618,37.493576,0,4,0,1,1,0,0,...,0,41.690667,-1,-1,-1,73,-1,47,-1,15
952,0.852227,18.124002,12.549743,0,5,0,4,1,0,0,...,0,42.683302,19,-1,-1,-1,-1,54,-1,35
953,36.210051,17.578128,14.950385,1,1,6,6,2,0,0,...,0,72.46235,-1,-1,-1,-1,-1,34,-1,15
954,283.061696,19.430898,12.990553,1,1,4,8,3,0,0,...,0,69.484445,22,46,-1,-1,-1,-1,-1,-1
955,41.406351,17.656733,20.811807,0,5,5,7,1,0,0,...,0,40.698032,25,49,-1,-1,-1,-1,-1,-1
956,9.240424,17.872943,46.730468,0,2,1,3,2,0,1,...,1,76.432889,-1,-1,-1,-1,-1,26,-1,-1
957,18.635335,18.984018,61.73604,1,3,1,2,2,0,0,...,0,68.49181,22,-1,-1,-1,-1,47,75,39
958,17.091327,18.317504,69.751823,1,4,1,3,2,0,0,...,0,73.454985,74,48,-1,-1,-1,22,-1,99
959,45.970422,16.441279,1.970774,1,5,6,6,2,0,0,...,1,76.432889,-1,-1,-1,-1,-1,47,-1,26


In [26]:
kg = ConjunctiveGraph()
with_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
no_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
print(f"Generated {len(kg)} RDF triples")

kg.serialize("sphn_transductive_950_50.ttl", format="turtle")
kg.serialize("sphn_transductive_950_50.nt", format="nt")
joblib.dump(train_df["outcome"].astype(int).to_list(), "outcomes_950_50"+ ".joblib")

Generated 111885 RDF triples




['outcomes_950_50.joblib']

In [13]:
display(train_df["outcome"])
display(train_df["outcome"].value_counts())

0      0.0
1      0.0
2      1.0
3      1.0
4      1.0
      ... 
995    0.0
996    1.0
997    1.0
998    1.0
999    1.0
Name: outcome, Length: 1000, dtype: float64

outcome
0.0    456
1.0    429
2.0    115
Name: count, dtype: int64

### 800 patients with outcomes ; 200 patients without outcomes

In [14]:
N = 1000
N_train = (int(N * 0.8))
print("size training set: "+str(N_train))

N_test = N - N_train    
print("size test set: "+str(N_test))
assert N == N_train + N_test

train_df = df.iloc[0:N]
with_outcome = df.iloc[0:N_train]
display(with_outcome)

no_outcome = df.iloc[N_train:N]
no_outcome = no_outcome.drop(columns=["outcome"])
display(no_outcome)

## Serialize data
kg = ConjunctiveGraph()
with_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
no_outcome.apply(gen_patient_rdf, axis=1, kg=kg)
print(f"Generated {len(kg)} RDF triples")

kg.serialize("sphn_transductive_800_200.ttl", format="turtle")
kg.serialize("sphn_transductive_800_200.nt", format="nt")
joblib.dump(train_df["outcome"].astype(int).to_list(), "outcomes_800_200"+ ".joblib")

size training set: 800
size test set: 200


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,outcome,entry_code,ica,ttt,ica_therapy,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
0,11.525542,18.050848,3.561477,0,1,0.0,2,1,1,0,...,0,46.653842,22,-1,-1,-1,-1,-1,-1,-1
1,4.096719,17.164788,20.830227,0,5,0.0,3,8,2,0,...,0,62.536000,25,-1,101,-1,-1,73,-1,49
2,92.015036,18.158804,29.897650,1,1,1.0,7,10,1,0,...,0,49.631746,-1,-1,-1,-1,-1,24,-1,54
3,66.217942,17.936781,45.870606,0,2,1.0,6,6,2,0,...,1,68.491810,-1,-1,-1,-1,-1,18,-1,44
4,25.694681,18.088936,4.813020,0,1,1.0,1,6,2,0,...,0,73.454985,23,-1,-1,-1,57,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,5.607700,18.018429,9.786872,0,1,1.0,4,2,2,0,...,0,66.506540,-1,73,47,-1,-1,26,-1,100
796,12.744652,16.951451,14.692523,0,2,0.0,1,2,2,0,...,0,67.499175,-1,-1,-1,-1,-1,-1,-1,26
797,11.104740,17.435429,9.744744,0,5,1.0,1,5,1,0,...,0,48.639111,-1,-1,-1,-1,-1,24,-1,-1
798,12.533918,17.410528,45.523159,0,2,0.0,3,1,1,0,...,0,30.771683,30,-1,-1,-1,-1,53,-1,76


Unnamed: 0,hospital_stay_length,gcs,nb_acte,gender,entry,entry_code,ica,ttt,ica_therapy,fever,...,ivh,age,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
800,27.716994,22.271162,162.629840,0,3,6,5,1,0,0,...,0,47.646476,100,82,40,-1,58,19,-1,-1
801,28.068950,18.638495,56.453281,0,1,0,1,1,0,0,...,0,37.720127,31,103,-1,-1,81,-1,-1,64
802,76.586852,17.813591,16.484651,0,4,0,4,1,0,0,...,0,32.756953,25,-1,-1,-1,-1,-1,-1,58
803,86.835429,17.359500,2.702275,1,5,3,2,1,0,0,...,0,49.631746,74,51,-1,-1,-1,22,-1,-1
804,34.533290,17.970486,55.513590,0,6,0,3,1,0,0,...,0,45.661207,16,-1,-1,-1,-1,-1,-1,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,22.292600,16.888011,1.548673,0,1,0,1,1,0,0,...,0,43.675937,79,-1,-1,-1,-1,47,-1,23
996,4.597317,17.981298,15.023840,1,1,10,2,2,0,0,...,1,82.388699,-1,-1,-1,-1,-1,49,-1,24
997,8.144656,17.542425,16.701046,0,1,3,5,1,0,1,...,0,46.653842,20,-1,-1,-1,-1,-1,-1,-1
998,13.022135,17.334170,64.537369,0,5,0,2,1,0,0,...,0,51.617016,-1,-1,54,-1,-1,30,-1,-1


Generated 112735 RDF triples




['outcomes_800_200.joblib']

# Computing the duration of care events

In [15]:
## Understanding this code ... 
events = ['nimodipine',  'paracetamol', 'nad', 'corotrop', 'morphine', 'dve', 'atl', 'iot']
events_end = events + ['finish']

transitions = pd.read_csv('./care_transitions_probs.csv', index_col=0)
transitions

start_probs = [0.47381546, 0.09476309, 0.00997506, 0, 0.00997506, 0.24189526, 0.00249377, 0.16708229, 0]

# Generate a sequence of care events 
# The sequence is generated by starting with an initial event and then randomly selecting the next event based on the transition probabilities
def generate_care_path():
    event = np.random.choice(events_end, size=1, p=start_probs)[0]
    path = [event]

    while event != 'finish':
        event = np.random.choice(events_end, size=1, p=transitions[event].values)[0]
        # ensure that their is no duplicate event
        if event in path:
            event = 'finish'
        path += [event]
        
    # remove the finish event and return the path
    return path[:-1]
    

def generate_times_path(path):
    # Generate the time taken (in hour) for each event with a normal distribution with parameters mean=24 and std=5
    indv_times = map(round, norm.rvs(24, 5, len(path)))
    acc_times = list(accumulate(indv_times))
    # print(list(acc_times))

    sol = [-1] * len(events)
    # print(sol)
    for i, e in enumerate(path):
        sol[events.index(e)] = acc_times[i]
    
    # print(sol)
    return sol

In [22]:
generate_care_path()

['dve', 'paracetamol', 'nimodipine', 'iot']

In [23]:
df_events = pd.DataFrame([generate_times_path(generate_care_path()) for _ in range(5)], columns=events)
df_events

Unnamed: 0,nimodipine,paracetamol,nad,corotrop,morphine,dve,atl,iot
0,87,66,-1,-1,47,17,-1,104
1,86,117,58,-1,-1,32,-1,-1
2,26,-1,-1,-1,-1,-1,-1,-1
3,30,-1,-1,-1,-1,-1,-1,-1
4,23,-1,-1,-1,-1,-1,-1,53
