# Hopitaux

Utilisation d'un seul dataframe, celui de Sos Médecin par départements (info sos médecin et hopitaux)

Création de 3 pickles, un par type de sexe. Index selon `date`, `region`, `age`
- `df_h.p` : les hommes
- `df_f.p` : les femmes
- `df_t.p` : total des 2
- `age_transformation.p` : dictionnaire qui contient 2 dictionnaires qui permettent de passer de label_2_value ou de value_2_label 

Attention, les données sont fournies ainsi, mais plus complètes pour l'ensemble des 2 que pour la répartion. Par conséquent la somme n'est pas toujours exacte.

# Imports

In [1]:
import pandas as pd
import sys
sys.path.append('../scripts/')
import utils_covid as f

pd.set_option('chained_assignment',None)
pd.set_option('display.max_columns', 500)

In [2]:
%load_ext autoreload
%autoreload 2

%aimport utils_covid

# Loading

__raw_data__

In [3]:
# Raw data
path = f.OPENDATA_PATH + 'coronavirus-tranche-age-urgences-sosmedecins-dep-france.csv'
df = pd.read_csv(path, sep=';')

# Display few lines
df.head()

Unnamed: 0,Code département,Date,Nb. pass. urgences pr suspicion,Total pass. urgences,Nb. hosp. urgences pr suspicion,H - Nb. pass. urgences pr suspicion,F - Nb. pass. urgences pr suspicion,H - Total pass. urgences,F - Total pass. urgences,H - Nb. hosp. pass. urgences pr suspicion,F - Nb. hosp. pass. urgences pr suspicion,Nb. actes méd. SOS Méd. pr suspicion,Total actes méd. SOS Méd.,H - Nb. actes méd. SOS Méd. pr suspicion,F - Nb. actes méd. SOS Méd. pr suspicion,H - Total actes méd. SOS Méd.,F - Total actes méd. SOS Méd.,Code de la région,Code ISO 3166 de la zone,Nom région,Nom Département,Libellé tranches d'âge,Geo Point
0,28,2020-03-22,2.0,40.0,1.0,,,,,,,,,,,,,24,FRA,Centre-Val de Loire,Eure-et-Loir,45-64 ans,"48.3904369966,1.36981669854"
1,2B,2020-03-22,3.0,12.0,3.0,,,,,,,,,,,,,94,FRA,Corse,Haute-Corse,> 74 ans,"42.3944507303,9.21065397337"
2,44,2020-03-22,12.0,39.0,12.0,,,,,,,4.0,64.0,,,,,52,FRA,Pays de la Loire,Loire-Atlantique,> 74 ans,"47.3612553393,-1.6750424562"
3,47,2020-03-22,5.0,30.0,0.0,,,,,,,,,,,,,75,FRA,Nouvelle-Aquitaine,Lot-et-Garonne,15-44 ans,"44.3650343519,0.459260227242"
4,56,2020-03-22,11.0,67.0,0.0,,,,,,,4.0,23.0,,,,,53,FRA,Bretagne,Morbihan,15-44 ans,"47.8441455862,-2.8088066223"


__renaming columns__

In [4]:
# To facilitate renaming function
print({i:'' for i in df.columns.tolist()})

{'Code département': '', 'Date': '', 'Nb. pass. urgences pr suspicion': '', 'Total pass. urgences': '', 'Nb. hosp. urgences pr suspicion': '', 'H - Nb. pass. urgences pr suspicion': '', 'F - Nb. pass. urgences pr suspicion': '', 'H - Total pass. urgences': '', 'F - Total pass. urgences': '', 'H - Nb. hosp. pass. urgences pr suspicion': '', 'F - Nb. hosp. pass. urgences pr suspicion': '', 'Nb. actes méd. SOS Méd. pr suspicion': '', 'Total actes méd. SOS Méd.': '', 'H - Nb. actes méd. SOS Méd. pr suspicion': '', 'F - Nb. actes méd. SOS Méd. pr suspicion': '', 'H - Total actes méd. SOS Méd.': '', 'F - Total actes méd. SOS Méd.': '', 'Code de la région': '', 'Code ISO 3166 de la zone': '', 'Nom région': '', 'Nom Département': '', "Libellé tranches d'âge": '', 'Geo Point': ''}


In [5]:
# Renaming all columns
df = df.rename(columns={
    'Code département': 'code_dept', 
    'Date': 'date', 
    'Nb. pass. urgences pr suspicion': 'urg_susp_t', 
    'Total pass. urgences': 'urg_tot_t', 
    'Nb. hosp. urgences pr suspicion': 'urg_hosp_t', 
    'H - Nb. pass. urgences pr suspicion': 'urg_susp_h', 
    'F - Nb. pass. urgences pr suspicion': 'urg_susp_f', 
    'H - Total pass. urgences': 'urg_tot_h', 
    'F - Total pass. urgences': 'urg_tot_f', 
    'H - Nb. hosp. pass. urgences pr suspicion': 'urg_hosp_h', 
    'F - Nb. hosp. pass. urgences pr suspicion': 'urg_hosp_f',
    'Nb. actes méd. SOS Méd. pr suspicion': 'sos_susp_t',
    'Total actes méd. SOS Méd.': 'sos_tot_t',
    'H - Nb. actes méd. SOS Méd. pr suspicion': 'sos_susp_h',
    'F - Nb. actes méd. SOS Méd. pr suspicion': 'sos_susp_f',
    'H - Total actes méd. SOS Méd.': 'sos_tot_h',
    'F - Total actes méd. SOS Méd.': 'sos_tot_f',
    'Code de la région': 'code_region',
    'Code ISO 3166 de la zone': 'iso_zone',
    'Nom région': 'region',
    'Nom Département': 'dept',
    "Libellé tranches d'âge": 'tranche_age',
    'Geo Point': 'geo_point'
    })

__Region mapping__

In [6]:
region = df[['code_region', 'region']].drop_duplicates().set_index('code_region')['region'].to_dict()
region

{24: 'Centre-Val de Loire',
 94: 'Corse',
 52: 'Pays de la Loire',
 75: 'Nouvelle-Aquitaine',
 53: 'Bretagne',
 44: 'Grand Est',
 27: 'Bourgogne-Franche-Comté',
 32: 'Hauts-de-France',
 76: 'Occitanie',
 84: 'Auvergne-Rhône-Alpes',
 11: 'Île-de-France',
 93: "Provence-Alpes-Côte d'Azur",
 3: 'Guyane',
 28: 'Normandie',
 1: 'Guadeloupe',
 2: 'Martinique',
 4: 'La Réunion',
 6: 'Mayotte'}

__Age mapping__

In [7]:
# age_convertissor
age = df['tranche_age'].unique().tolist()
{i:'' for i in age}

{'45-64 ans': '',
 '> 74 ans': '',
 '15-44 ans': '',
 'tous âges': '',
 '65-74 ans': '',
 '< 15ans': ''}

In [8]:
age_conv = {'tous âges': 'all',
 '65-74 ans': '65_74',
 '15-44 ans': '15_44',
 '> 74 ans': '74__',
 '45-64 ans': '45_64',
 '< 15ans': '__15'}

age_rev = {}
for k, v in age_conv.items():
    age_rev[v]=k
age_rev

# Saving results
age_transformation = {'label_2_value':age_conv, 'value_2_label':age_rev}
f.save_pickle(age_transformation, 'age_transformation.p')

__finish pre-processing__

In [9]:
df['age'] = df['tranche_age'].apply(lambda x: age_conv[x])
df = df.drop(['code_dept', 'dept', 'iso_zone', 'region', 'tranche_age', 'region'], axis = 1)
df.head()

Unnamed: 0,date,urg_susp_t,urg_tot_t,urg_hosp_t,urg_susp_h,urg_susp_f,urg_tot_h,urg_tot_f,urg_hosp_h,urg_hosp_f,sos_susp_t,sos_tot_t,sos_susp_h,sos_susp_f,sos_tot_h,sos_tot_f,code_region,geo_point,age
0,2020-03-22,2.0,40.0,1.0,,,,,,,,,,,,,24,"48.3904369966,1.36981669854",45_64
1,2020-03-22,3.0,12.0,3.0,,,,,,,,,,,,,94,"42.3944507303,9.21065397337",74__
2,2020-03-22,12.0,39.0,12.0,,,,,,,4.0,64.0,,,,,52,"47.3612553393,-1.6750424562",74__
3,2020-03-22,5.0,30.0,0.0,,,,,,,,,,,,,75,"44.3650343519,0.459260227242",15_44
4,2020-03-22,11.0,67.0,0.0,,,,,,,4.0,23.0,,,,,53,"47.8441455862,-2.8088066223",15_44


# Creating 3 data frames for H/F/All

In [10]:
def clean_df(df, to_keep, to_del_1, to_del_3):
    col_t = [col for col in df.columns.tolist() if ((col[-2:]!=to_del_1) and (col[-2:]!=to_del_3))]
    df_t = df[col_t]
    agg_dict = {col:'sum'  for col in col_t if to_keep in col}
    agg_dict['geo_point'] = 'last'
    df_t = df_t.groupby(['date','code_region', 'age']).agg(agg_dict)
    df_t.columns = [col[:-2] if col.endswith(to_keep) else col for col in df_t.columns ]
    df_t.columns = ['urg_tot', 'urg_susp', 'urg_hosp', 'sos_tot', 'sos_susp', 'geo_point']
    df_t['lat'] = df_t['geo_point'].apply(lambda x: x.split(',')[0])
    df_t['lon'] = df_t['geo_point'].apply(lambda x: x.split(',')[1])
    df_t = df_t.drop('geo_point', axis=1)
    return df_t

In [11]:
df_h = clean_df(df, '_h', '_f', '_t')
df_f = clean_df(df, '_f', '_h', '_t')
df_t = clean_df(df, '_t', '_f', '_h')

f.save_pickle(df_h, 'df_sos_h.p')
f.save_pickle(df_f, 'df_sos_f.p')
f.save_pickle(df_t, 'df_sos_t.p')

# Exploration

__Re-Loading__

In [12]:
df_h = f.load_pickle('df_sos_h.p')
df_f = f.load_pickle('df_sos_f.p')
df_t = f.load_pickle('df_sos_t.p')

__Comparaison de la somme H+F = total__

In [13]:
(df_h + df_f).head(6)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,urg_tot,urg_susp,urg_hosp,sos_tot,sos_susp,lat,lon
date,code_region,age,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-02-24,1,15_44,0.0,0.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924
2020-02-24,1,45_64,0.0,0.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924
2020-02-24,1,65_74,0.0,0.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924
2020-02-24,1,74__,0.0,0.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924
2020-02-24,1,__15,0.0,0.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924
2020-02-24,1,all,0.0,300.0,0.0,0.0,0.0,16.19764633716.197646337,-61.5397927924-61.5397927924


In [14]:
df_t.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,urg_tot,urg_susp,urg_hosp,sos_tot,sos_susp,lat,lon
date,code_region,age,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-02-24,1,15_44,0.0,102.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,1,45_64,0.0,68.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,1,65_74,0.0,20.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,1,74__,0.0,35.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,1,__15,0.0,75.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,1,all,0.0,300.0,0.0,0.0,0.0,16.197646337,-61.5397927924
2020-02-24,2,15_44,0.0,0.0,0.0,0.0,28.0,14.6548621716,-61.0193671014
2020-02-24,2,45_64,0.0,0.0,0.0,0.0,25.0,14.6548621716,-61.0193671014
2020-02-24,2,65_74,0.0,0.0,0.0,0.0,9.0,14.6548621716,-61.0193671014
2020-02-24,2,74__,0.0,0.0,0.0,0.0,15.0,14.6548621716,-61.0193671014


__Filtering__

In [80]:
# Cas d'une journée par région
df_t.xs('2020-03-21').xs('all', level=1)[['urg_tot', 'lat','lon']].reset_index()

Unnamed: 0,code_region,urg_tot,lat,lon
0,11,266.0,48.9083101579,2.48332622588
1,24,47.0,48.3904369966,1.36981669854
2,28,216.0,49.1173587925,0.994646492411
3,32,0.0,49.5565688819,3.5620366046
4,44,52.0,48.1122966595,5.22903858107
5,52,347.0,48.1463719594,-0.65589089497
6,75,26.0,45.1059247866,0.741353186486
7,76,948.0,43.1040405728,2.41617917074
8,84,0.0,44.7532685394,4.42389358891
9,93,230.0,43.93340912,7.11989042987


In [21]:
# Cas d'une journée par tranche d'age
df_t.xs('2020-03-21').xs(11)#.sum()

Unnamed: 0_level_0,urg_tot,urg_susp,urg_hosp,sos_tot,sos_susp,lat,lon
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
15_44,106.0,1365.0,30.0,237.0,1051.0,48.8557954256,2.34411308321
45_64,75.0,833.0,35.0,91.0,480.0,48.8557954256,2.34411308321
65_74,28.0,354.0,13.0,29.0,191.0,48.7692936982,2.47462077077
74__,50.0,708.0,46.0,28.0,274.0,48.7692936982,2.47462077077
__15,7.0,692.0,1.0,57.0,623.0,48.7692936982,2.47462077077
all,266.0,3952.0,125.0,443.0,2621.0,48.9083101579,2.48332622588


In [64]:
# Repartition de l'age pour une journée eet une région donnée
df_t.xs(jour).xs(region)[col]

age
15_44    1365.0
45_64     833.0
65_74     354.0
74__      708.0
__15      692.0
all      3952.0
Name: urg_susp, dtype: float64

In [20]:
# Total en France pur un jour donné
df_h.xs('2020-03-21').xs('all', level=1).sum()

urg_tot                                                  1012
urg_susp                                                10094
urg_hosp                                                  428
sos_tot                                                   723
sos_susp                                                 4258
lat         48.908310157948.390436996649.117358792549.5565...
lon         2.483326225881.369816698540.9946464924113.5620...
dtype: object

In [25]:
# Times Series sur toute la France pour chaque colonne présente
tmp = df_t.xs('all', level=2).groupby('date').sum()
tmp.head()

Unnamed: 0_level_0,urg_tot,urg_susp,urg_hosp,sos_tot,sos_susp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-24,1.0,48553.0,0.0,0.0,12071.0
2020-02-25,1.0,42682.0,0.0,0.0,10913.0
2020-02-26,0.0,42098.0,0.0,0.0,10815.0
2020-02-27,18.0,41727.0,1.0,0.0,10764.0
2020-02-28,31.0,41844.0,6.0,0.0,10411.0


In [26]:
# Times Series sur UNE SEULE REGION pour chaque colonne présente
region = 11
tmp = df_t.xs(region, level=1).xs('all', level=1)
tmp.head()

Unnamed: 0_level_0,urg_tot,urg_susp,urg_hosp,sos_tot,sos_susp,lat,lon
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-24,0.0,9061.0,0.0,0.0,2816.0,48.7692936982,2.47462077077
2020-02-25,0.0,7876.0,0.0,0.0,2520.0,48.7692936982,2.47462077077
2020-02-26,0.0,7783.0,0.0,0.0,2480.0,48.9083101579,2.48332622588
2020-02-27,9.0,7736.0,1.0,0.0,2490.0,48.8557954256,2.34411308321
2020-02-28,9.0,7742.0,4.0,0.0,2534.0,48.7692936982,2.47462077077


In [None]:
#tmp = tmp.loc[['__15', '15_44', '45_64', '65_74', '74__', 'all']]

In [32]:
# Somme d'un indcateur sur un jour en France
jour = '2020-03-21'
col = 'urg_susp'
tmp = df_h.xs(jour).xs('all', level=1)[col].sum()
tmp

10094.0

In [33]:
# Indicateur sur un jour dans les régions de France
df_t.xs(jour).xs('all', level=1)[[col, 'lat','lon']]

Unnamed: 0_level_0,urg_susp,lat,lon
code_region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
11,3952.0,48.9083101579,2.48332622588
24,940.0,48.3904369966,1.36981669854
28,2428.0,49.1173587925,0.994646492411
32,0.0,49.5565688819,3.5620366046
44,878.0,48.1122966595,5.22903858107
52,2606.0,48.1463719594,-0.65589089497
75,693.0,45.1059247866,0.741353186486
76,5232.0,43.1040405728,2.41617917074
84,0.0,44.7532685394,4.42389358891
93,2699.0,43.93340912,7.11989042987


In [34]:
# Repartioiton un jour par région d'un indicateur
df_t.xs(jour).xs(region)[col]

age
15_44    1365.0
45_64     833.0
65_74     354.0
74__      708.0
__15      692.0
all      3952.0
Name: urg_susp, dtype: float64

In [42]:
# Répartition de l'age d'un indicateur en France
tmp = df_t.xs(jour).groupby('age')[col].sum()
tmp = tmp.loc[['__15', '15_44', '45_64', '65_74', '74__', 'all']]
tmp

age
__15      3306.0
15_44     7131.0
45_64     4330.0
65_74     1747.0
74__      2914.0
all      19428.0
Name: urg_susp, dtype: float64

In [43]:
ages = tmp.index.tolist()
ages

['__15', '15_44', '45_64', '65_74', '74__', 'all']

In [44]:
labels = [age_rev[age] for age in ages]
labels

['< 15ans', '15-44 ans', '45-64 ans', '65-74 ans', '> 74 ans', 'tous âges']

In [46]:
# Times Seires en France pour les indicateurs des urgences
tmp = df_f.xs('all', level=2).groupby('date').sum()
urg_col = ['urg_tot', 'urg_susp', 'urg_hosp']
urg = tmp[urg_col]
urg.head()

Unnamed: 0_level_0,urg_tot,urg_susp,urg_hosp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-24,1.0,23165.0,0.0
2020-02-25,1.0,20876.0,0.0
2020-02-26,0.0,20533.0,0.0
2020-02-27,7.0,20585.0,0.0
2020-02-28,12.0,20643.0,3.0
