In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

%matplotlib inline

# Gather Data

The data is collected in three blocks:
- 2021-04-19 _COVID_DSL_0X.CSV_
- 2020-07-20 _CDSL_0X.csv_
- 2020-04-24 _0X.csv_

Each block containing 6 different tables/.cdv-files. 

### Data Description
- The main table includes data on the admission and the patient (age and sex), data on the previous emergency if there has been one (2,226 records), data on their stay in the ICU if there has been one and records of the first and last set of emergency constants.
- The medication table shows all the medication administered to each patient during admission (more than 60,000 records), with the dates corresponding to the first and last administration of each drug, identified by their brand name and classification in the ATC5/ATC7.
- In the table of vital signs, there are all the basic records of constants (54,000 records so far) collected during admission with their date and time of registration.
- The laboratory table contains the results of the determinations (398,884 records) of all the requests made to each patient during admission and in the previous emergency, if any.
- And finally, the ICD10 coding tables show the records of diagnostic and procedural information coded according to the international ICD10 classification in its latest distributed version (does not include COVID), for the patients referred, both for episodes of hospital admission (more than 1,600) and for the emergency (more than 1,900) prior to those episodes, if any.

### Data Specification
1. **Records of Inpatients:** demographic, data on the episode of admission, data on the previous emergency episode, if any, data on the ICU, if there has been.
2. **Records of medication** prescribed and administered during admission (ICU medication is not collected).
3. **Records of constants** during admission (ICU records are not collected).
4. **Records of the results of laboratory requests** made during admission and in the previous emergency, if there have been.
5. **Records of the emergency coding**, if any, according to the ICD10.
6. **Records for inpatient coding** according to ICD10.

Translation of the headlines in each file from spanish to english is covered in the file _COVID_DATA_SAVE_LIVES_.

In [4]:
s = os.path.dirname(os.getcwd()).rstrip()
s = s[1:]

In [6]:
os.listdir()

['cleaning.ipynb',
 '.DS_Store',
 'utils',
 'README.md',
 '.ipynb_checkpoints',
 '.git',
 'data']

In [16]:
pd.read_csv('data/COVID_DSL_01.CSV', sep='|', encoding = 'ISO-8859-1')

Unnamed: 0,EDAD,SEX,IDINGRESO,F_INGRESO_ING,F_ALTA_ING,MOTIVO_ALTA_ING,DIAGNOSTICO_ING,F_INGRESO_URG,HORA_URG,DIAG_URG,...,GLU_ULTIMA_URG,DESTINO_URG,IDCDSL,F_ING_ANT,DIAG_ANT,RESPIRADOR,F_UCI_IN,F_UCI_OUT,UCI_DAYS,UCI_N_ING
0,55,MALE,508,2020-04-05 00:00:00,2020-04-15 00:00:00,Domicilio,COVID CONFIRMADO,05/04/2020,22:28,DIFICULTAD RESPIRATORIA,...,0,Ingreso,508,,,SI,,,,
1,54,MALE,509,2020-04-05 00:00:00,2020-04-14 00:00:00,Domicilio,COVID CONFIRMADO,05/04/2020,22:32,DIFICULTAD RESPIRATORIA,...,0,Ingreso,509,,,,,,,
2,73,MALE,510,2020-04-05 00:00:00,2020-04-20 00:00:00,Domicilio,COVID CONFIRMADO,05/04/2020,22:35,DIFICULTAD RESPIRATORIA,...,0,Ingreso,510,,,SI,,,,
3,90,FEMALE,511,2020-04-06 00:00:00,2020-04-07 00:00:00,Domicilio,COVID SOSPECHA,05/04/2020,23:46,DIFICULTAD RESPIRATORIA,...,0,Ingreso,511,,,,,,,
4,70,FEMALE,512,2020-04-06 00:00:00,2020-04-28 00:00:00,Traslado al Hospital,COVID CONFIRMADO,06/04/2020,00:57,DIFICULTAD RESPIRATORIA,...,0,Ingreso,512,,,SI,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4474,61,MALE,2742,2020-03-22 00:00:00,2020-03-24 00:00:00,Traslado al Hospital,COVID CONFIRMADO,22/03/2020,20:04,FIEBRE,...,0,Ingreso,2742,2020-03-24,COVID CONFIRMADO,SI,,,,
4475,54,MALE,2743,2020-03-22 00:00:00,2020-04-02 00:00:00,Domicilio,COVID CONFIRMADO,22/03/2020,20:17,TOS,...,0,Ingreso,2743,,,SI,,,,
4476,77,MALE,2744,2020-03-23 00:00:00,2020-04-07 00:00:00,Domicilio,COVID CONFIRMADO,22/03/2020,20:39,TOS,...,0,Ingreso,2744,2020-04-10,NEUTROPENIA FEBRIL,SI,,,,
4477,71,MALE,2745,2020-03-22 00:00:00,2020-05-22 00:00:00,Domicilio,COVID CONFIRMADO,22/03/2020,20:50,DIFICULTAD RESPIRATORIA,...,0,Ingreso,2745,2020-07-09,AIT,SI,2020-03-27 19:06:27.673000000,2020-04-27 13:32:57.683000000,31.0,4.0


In [17]:
pd.read_csv('data/COVID_DSL_02.CSV', sep='|', encoding = 'ISO-8859-1', low_memory = False)

Unnamed: 0,IDINGRESO,CONSTANTS_ING_DATE,CONSTANTS_ING_TIME,TA_MAX_ING,TA_MIN_ING,TEMP_ING,FC_HR_ING,SAT_02_ING,SAT_02_ING_OBS,GLU_GLY_ING
0,1843,2021-01-23,00:28,0,0,0,0,95,SATO2 BASAL,0
1,1764,2021-01-23,00:31,0,0,0,0,91,SATO2 RESER MAX,0
2,1761,2021-01-23,00:32,0,0,0,0,95,,0
3,1792,2021-01-23,00:32,0,0,0,0,95,SATO2 GN 2 L,0
4,1225,2021-01-23,00:32,0,0,0,0,99,,0
...,...,...,...,...,...,...,...,...,...,...
224141,542,2020-05-17,10:27,0,0,0,0,98,SATO2 GN 1L,0
224142,605,2020-05-17,11:55,0,0,0,0,96,,0
224143,580,2020-11-08,15:46,0,0,0,0,35,SATO2 GN 4L,0
224144,4298,2020-11-08,15:50,0,0,0,0,97,SATO2 GN 1L,0


In [18]:
pd.read_csv('data/COVID_DSL_03.CSV', sep='|', encoding = 'ISO-8859-1')

Unnamed: 0,IDINGRESO,DIA_PPAL,DIA_02,DIA_03,DIA_04,DIA_05,DIA_06,DIA_07,DIA_08,DIA_09,DIA_10,DIA_11,DIA_12,PROC_01,PROC_02,PROC_03,PROC_04,PROC_05
0,508,B97.29,R68.89,J12.89,J12.9,Z20.828,,,,,,,,BW03ZZZ,0BJ0XZZ,,,
1,509,R68.89,J06.9,E11.9,B97.29,,,,,,,,,BW03ZZZ,0BJ0XZZ,,,
2,510,B97.29,J12.9,J12.89,,,,,,,,,,BW03ZZZ,0BJ0XZZ,,,
3,511,J18.9,R68.89,,,,,,,,,,,BW24ZZZ,,,,
4,512,R06.00,B34.2,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4192,2758,E86.0,E87.1,J12.89,J84.9,R68.89,,,,,,,,BW03ZZZ,0BJ0XZZ,,,
4193,2759,J96.90,R68.89,,,,,,,,,,,BW03ZZZ,,,,
4194,2760,R06.00,J12.89,B97.29,,,,,,,,,,BW03ZZZ,,,,
4195,2761,R68.89,J96.90,,,,,,,,,,,BW03ZZZ,,,,


In [8]:
# Import data

for i in range(1,7):
    df_name = 'data1' + str(i)
    file_name = 'COVID_DSL_0' + str(i) + '.CSV'
    file_path = 'data/' + file_name
    print(file_path)
    df_name = pd.read_csv(file_path, sep='|')

data/COVID_DSL_01.CSV


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd3 in position 24: invalid continuation byte