# Flux de patients selon les GHM et diagrammes de Sankey

In [1]:
!pip install prefixspan



In [2]:
from prefixspan import PrefixSpan
import pandas as pd
import numpy as np
from tqdm import tqdm

## Import des parcours de soins

In [3]:
p_soins_IC=pd.read_csv("./data/p_soins_IC.csv", index_col=0)

In [4]:
p_soins_IC=p_soins_IC.rename(columns={"0": "CODE_PATIENT", "1": "cluster"})

In [5]:
def truncateGHM(df):
    for i in tqdm(range(2, df.shape[1])):
        df[str(i)]=df[str(i)].map(lambda x: str(x)[:5])
    df = df.replace('nan', np.nan)

In [6]:
truncateGHM(p_soins_IC)
p_soins_IC=p_soins_IC.replace('nan', np.nan)

100%|██████████| 123/123 [00:00<00:00, 486.87it/s]


In [7]:
p_soins_IC.head()

Unnamed: 0,CODE_PATIENT,cluster,2,3,4,5,6,7,8,9,...,115,116,117,118,119,120,121,122,123,124
0,P0,4,05M09,05M09,05C19,05M20,05M20,05C19,,,...,,,,,,,,,,
1,P6,4,05M09,04M13,05C22,23M10,04M05,04M24,Décès,,...,,,,,,,,,,
2,P17,5,05M09,04M14,11M04,04M05,09M06,05K25,09M06,09M06,...,,,,,,,,,,
3,P24,1,05M09,05M17,05M13,05M13,05M13,05K06,05M13,05M13,...,,,,,,,,,,
4,P25,4,05M09,23K02,09C08,09C08,19M02,,,,...,,,,,,,,,,


## Flux de GHM pour l'ensemble de la population avec IC

In [8]:
p_soins_IC_12=p_soins_IC.iloc[:,2:4]
p_soins_IC_12.head()

Unnamed: 0,2,3
0,05M09,05M09
1,05M09,04M13
2,05M09,04M14
3,05M09,05M17
4,05M09,23K02


In [9]:
corpus_IC=[]
for patient in range(len(p_soins_IC_12)):
    corpus_IC.append([p_soins_IC_12.iloc[patient][0], p_soins_IC_12.iloc[patient][1]])

In [10]:
ps = PrefixSpan(corpus_IC)
ps.minlen=2
output=ps.topk(k=10)

In [11]:
for i in output:
    ghm1=str(i[1][0])
    ghm2=str(i[1][1])
    print("['"+ghm1+'\'',",\'"+ghm2+" ',",round(i[0]/len(corpus_IC),3),"],")

['05M09' ,'05M09 ', 0.164 ],
['05M09' ,'Décès ', 0.16 ],
['05M09' ,'05K10 ', 0.042 ],
['05M09' ,'nan ', 0.032 ],
['05M09' ,'05M08 ', 0.024 ],
['05M09' ,'04M05 ', 0.022 ],
['05M09' ,'23M20 ', 0.018 ],
['05M09' ,'04M13 ', 0.016 ],
['05M09' ,'02C05 ', 0.016 ],
['05M09' ,'16M11 ', 0.014 ],


In [12]:
p_soins_IC_23=p_soins_IC.iloc[:,3:5]
p_soins_IC_23.head()

Unnamed: 0,3,4
0,05M09,05C19
1,04M13,05C22
2,04M14,11M04
3,05M17,05M13
4,23K02,09C08


In [13]:
p_soins_IC_23

Unnamed: 0,3,4
0,05M09,05C19
1,04M13,05C22
2,04M14,11M04
3,05M17,05M13
4,23K02,09C08
...,...,...
4632,23M20,04M22
4633,23K02,05M06
4634,10M18,04M13
4635,05M09,05K05


In [14]:
corpus_IC_2=[]
for patient in range(len(p_soins_IC_23)):
    if str(p_soins_IC_23.iloc[patient][1])!='nan':
        corpus_IC_2.append([p_soins_IC_23.iloc[patient][0], p_soins_IC_23.iloc[patient][1]])
    else:
        corpus_IC_2.append([p_soins_IC_23.iloc[patient][0], 'nan'])

In [15]:
ps = PrefixSpan(corpus_IC_2)
ps.minlen=2
output=ps.topk(k=10)

In [16]:
for i in output:
    ghm1=str(i[1][0])
    ghm2=str(i[1][1])
    print("['"+ghm1+' \'',",\' "+ghm2+" ',",round(i[0]/len(corpus_IC),3),"],")

['Décès ' ,' nan ', 0.16 ],
['05M09 ' ,' 05M09 ', 0.045 ],
['nan ' ,' nan ', 0.032 ],
['05M09 ' ,' Décès ', 0.027 ],
['05M09 ' ,' nan ', 0.006 ],
['05K10 ' ,' 05M09 ', 0.006 ],
['23Z02 ' ,' Décès ', 0.006 ],
['02C05 ' ,' nan ', 0.004 ],
['04M05 ' ,' Décès ', 0.004 ],
['05M09 ' ,' 05K10 ', 0.004 ],


## Flux de patients par clusters

In [35]:
def generate3stepsSankey(cluster_n, GHM_len=5):
    p_soins_IC_12_cluster_n=p_soins_IC[p_soins_IC["cluster"]==cluster_n].astype(str).iloc[:,2:4]
    p_soins_IC_23_cluster_n=p_soins_IC[p_soins_IC["cluster"]==cluster_n].astype(str).iloc[:,3:5]

    corpus_IC=[]
    for patient in range(len(p_soins_IC_12_cluster_n)):
        corpus_IC.append([p_soins_IC_12_cluster_n.iloc[patient][0], p_soins_IC_12_cluster_n.iloc[patient][1]])

    ps = PrefixSpan(corpus_IC)
    ps.minlen=2
    output=ps.topk(k=20)

    for i in output:
        ghm1=str(i[1][0])
        ghm2=str(i[1][1])
        print("['"+ghm1+'\'',",\'"+ghm2+" ',",round(i[0],3),"],")

    print("\n")

    corpus_IC_2=[]
    for patient in range(len(p_soins_IC_23_cluster_n)):
        if str(p_soins_IC_23_cluster_n.iloc[patient][1])!='nan':
            corpus_IC_2.append([p_soins_IC_23_cluster_n.iloc[patient][0], p_soins_IC_23_cluster_n.iloc[patient][1]])
        else:
            corpus_IC_2.append([p_soins_IC_23_cluster_n.iloc[patient][0], 'nan'])

    ps = PrefixSpan(corpus_IC_2)
    ps.minlen=2
    output=ps.topk(k=20)

    for i in output:
        ghm1=str(i[1][0])
        ghm2=str(i[1][1])
        print("['"+ghm1+' \'',",\' "+ghm2+" ',",round(i[0],3),"],")

In [36]:
generate3stepsSankey(1)

['05M09' ,'05M09 ', 161 ],
['05M09' ,'05K10 ', 37 ],
['05M09' ,'Décès ', 33 ],
['05M09' ,'04M05 ', 26 ],
['05M09' ,'05M08 ', 25 ],
['05M09' ,'11M06 ', 15 ],
['05M09' ,'04M13 ', 14 ],
['05M09' ,'16M11 ', 14 ],
['05M09' ,'23M20 ', 14 ],
['05M09' ,'04M03 ', 11 ],
['05M09' ,'02C05 ', 10 ],
['05M09' ,'05M17 ', 10 ],
['05M09' ,'04M20 ', 9 ],
['05M09' ,'05M06 ', 9 ],
['05M09' ,'11K02 ', 8 ],
['05M09' ,'04M11 ', 7 ],
['05M09' ,'05C14 ', 7 ],
['05M09' ,'06M12 ', 7 ],
['05M09' ,'19M06 ', 7 ],
['05M09' ,'nan ', 7 ],


['05M09 ' ,' 05M09 ', 52 ],
['Décès ' ,' nan ', 33 ],
['04M05 ' ,' 05M09 ', 8 ],
['05M09 ' ,' Décès ', 7 ],
['nan ' ,' nan ', 7 ],
['05M09 ' ,' 05K10 ', 6 ],
['05M08 ' ,' 05M09 ', 5 ],
['05M09 ' ,' 05K06 ', 5 ],
['05M09 ' ,' 11M04 ', 5 ],
['01M20 ' ,' 05M09 ', 4 ],
['04M03 ' ,' 05M09 ', 4 ],
['05K10 ' ,' 05K10 ', 4 ],
['05K10 ' ,' 05M09 ', 4 ],
['05K10 ' ,' 23M10 ', 4 ],
['05M09 ' ,' 04M13 ', 4 ],
['09C03 ' ,' 05M09 ', 4 ],
['11M06 ' ,' 05M09 ', 4 ],
['05M09 ' ,' 05M15 ', 3 ],
['05M