### Importar Bibliotecas utilizadas

In [None]:
import math
import pandas as pd
import glob

# Conectando ao Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount = True)

PATH = '/content/drive/MyDrive/MC936/P2/data/processed/'

Mounted at /content/drive


### Abrir os arquivos com os dados dos cenários e selecionar os pacientes que morreram por COVID-19

In [None]:
query = "scenario*_clustering.csv"
SCS_PATHS = glob.glob(PATH+query)
# print(SCS_PATHS)
SCS_DFS = list(map(lambda csv_path: pd.read_csv(csv_path), SCS_PATHS))
# Selecionar pacientes que morreram por COVID-19 em até 30 dias
DIED_DFS = list(map(lambda df: df.loc[df['DIED IN ONE MONTH'] == 1].copy(), SCS_DFS))

### Definição das Características dos grupos de Risco 

In [None]:
risk_group = list()

# COVID Related
risk_group.append('DIED IN ONE MONTH')
risk_group.append('COVID-19')
risk_group.append('Face mask (physical object)')
risk_group.append('SARS-COV-2 (COVID-19) vaccine  mRNA  spike protein  LNP  preservative free  30 mcg/0.3mL dose')
risk_group.append('SARS-COV-2 (COVID-19) vaccine  mRNA  spike protein  LNP  preservative free  100 mcg/0.5mL dose')
risk_group.append('SARS-COV-2 (COVID-19) vaccine  vector non-replicating  recombinant spike protein-Ad26  preservative free  0.5 mL')
risk_group.append('Suspected COVID-19')

# High blood pressure
risk_group.append('Hypertension')

# Heart problems
risk_group.append('Cardiac Arrest')
risk_group.append('Coronary Heart Disease')
risk_group.append('Heart failure (disorder)')
risk_group.append('Heart failure education (procedure)')
risk_group.append('History of cardiac arrest (situation)')
risk_group.append('History of myocardial infarction (situation)')
risk_group.append('Injury of heart (disorder)')
risk_group.append('Myocardial Infarction')

# Lung problems
risk_group.append('Acute bronchitis (disorder)')
risk_group.append('Acute pulmonary embolism (disorder)')
risk_group.append('Acute respiratory distress syndrome (disorder)')
risk_group.append('Acute respiratory failure (disorder)')
risk_group.append('Childhood asthma')
risk_group.append('Chronic obstructive bronchitis (disorder)')
risk_group.append('Lung volume reduction surgery (procedure)')
risk_group.append('Oxygen administration by mask (procedure)')
risk_group.append('Pulmonary emphysema (disorder)')
risk_group.append('Pulmonary rehabilitation (regime/therapy)')
risk_group.append('Respiratory distress (finding)')

# Diabetes
risk_group.append('Diabetes')
risk_group.append('Diabetic renal disease (disorder)')
risk_group.append('Diabetic retinopathy associated with type II diabetes mellitus (disorder)')
risk_group.append('Hyperglycemia (disorder)')
risk_group.append('Prediabetes')

# Obesity
risk_group.append('Body mass index 30+ - obesity (finding)')
risk_group.append('Body mass index 40+ - severely obese (finding)')

# Cancer
risk_group.append('Acute myeloid leukemia  disease (disorder)')
risk_group.append('Carcinoma in situ of prostate (disorder)')
risk_group.append('Chemotherapy (procedure)')
risk_group.append('Febrile neutropenia (disorder)')
risk_group.append('Malignant neoplasm of breast (disorder)')
risk_group.append('Malignant tumor of colon')
risk_group.append('Neoplasm of prostate')
risk_group.append('Neutropenia (disorder)')
risk_group.append('Overlapping malignant neoplasm of colon')

not_present = ['Heart failure education (procedure)',
 'History of myocardial infarction (situation)',
 'Myocardial Infarction',
 'Diabetic retinopathy associated with type II diabetes mellitus (disorder)',
 'Carcinoma in situ of prostate (disorder)',
 'Malignant tumor of colon',
 'Neoplasm of prostate']

risk_group.remove('COVID-19')
new_risk_group = list()
for x in risk_group:
  if x not in not_present:
    new_risk_group.append(x)

others = ['AGE', 'PATIENT', 'RACE',
                   'ETHNICITY', 'GENDER', 'CITY', 'STATE', 'Cluster' ]

print(new_risk_group)

### Selecionar as colunas relacionadas as características dos grupos de risco

In [None]:
scenario01_02_rfc = list(map(lambda df: df[new_risk_group+others].copy(), [DIED_DFS[0], DIED_DFS[1]]))
scenario03_rfc = list(map(lambda df: df[risk_group+others].copy(), [DIED_DFS[2], DIED_DFS[3]]))

### Criar para cada paciente em cada DataFrame uma das características do grupo de risco que ele apresenta, bem como outras caracteristicas como idade e sexo

In [None]:
def get_features(row, features:list):
  lfs = list()
  for feature in features:
    if row[feature] == 1:
      lfs.append(feature)

  return lfs

def get_patients(df:pd.DataFrame, features: list):
  df.reset_index()
  p_dict = dict()
  for index, row in df.iterrows():
    p_dict[row['PATIENT']] = get_features(row, features)
    p_dict[row['PATIENT']] += [row['AGE'], row['Cluster'],
                              row['GENDER'], row['ETHNICITY']]

  return p_dict

In [None]:
scenario01_02_rfc = list(map(lambda x: get_patients(x, new_risk_group), scenario01_02_rfc))
scenario03_rfc = list(map(lambda x: get_patients(x, risk_group), scenario03_rfc))

### Imprimir para cada Dataframe a lista de pacientes com suas respectivas características

In [None]:
print("### Clusterização Hierárquica cenário 01+02\n")
for patient, data in scenario01_02_rfc[0].items():
  print(f'Informações do Paciente **{patient}** :')
  for value in data:
    print(f'- {value}')
  print()

### Clusterização Hierárquica cenário 01+02

Informações do Paciente **9ec0c1c1-724e-135d-aa6d-bd0779e31c44** :
- DIED IN ONE MONTH
- Face mask (physical object)
- SARS-COV-2 (COVID-19) vaccine  mRNA  spike protein  LNP  preservative free  30 mcg/0.3mL dose
- Suspected COVID-19
- Acute bronchitis (disorder)
- Acute pulmonary embolism (disorder)
- Acute respiratory distress syndrome (disorder)
- Acute respiratory failure (disorder)
- Lung volume reduction surgery (procedure)
- Oxygen administration by mask (procedure)
- Pulmonary emphysema (disorder)
- Pulmonary rehabilitation (regime/therapy)
- Respiratory distress (finding)
- 72.0
- C7
- M
- nonhispanic

Informações do Paciente **d1e4b72c-3916-e04f-cfb7-8e97f3c7e4b7** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Acute bronchitis (disorder)
- Acute pulmonary embolism (disorder)
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)

In [None]:
print("### Clusterização K-Means cenário 01+02\n")
for patient, data in scenario01_02_rfc[1].items():
  print(f'Informações do Paciente **{patient}** :')
  for value in data:
    print(f'- {value}')
  print()

### Clusterização K-Means cenário 01+02

Informações do Paciente **9ec0c1c1-724e-135d-aa6d-bd0779e31c44** :
- DIED IN ONE MONTH
- Face mask (physical object)
- SARS-COV-2 (COVID-19) vaccine  mRNA  spike protein  LNP  preservative free  30 mcg/0.3mL dose
- Suspected COVID-19
- Acute bronchitis (disorder)
- Acute pulmonary embolism (disorder)
- Acute respiratory distress syndrome (disorder)
- Acute respiratory failure (disorder)
- Lung volume reduction surgery (procedure)
- Oxygen administration by mask (procedure)
- Pulmonary emphysema (disorder)
- Pulmonary rehabilitation (regime/therapy)
- Respiratory distress (finding)
- 72.0
- C1
- M
- nonhispanic

Informações do Paciente **d1e4b72c-3916-e04f-cfb7-8e97f3c7e4b7** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Acute bronchitis (disorder)
- Acute pulmonary embolism (disorder)
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)
- B

In [None]:
print("### Clusterização Hierárquica cenário 03\n")
for patient, data in scenario03_rfc[0].items():
  print(f'Informações do Paciente **{patient}** :')
  for value in data:
    print(f'- {value}')
  print()

### Clusterização Hierárquica cenário 03

Informações do Paciente **70d44a6a-1359-fd33-3447-f3cb0942a2ce** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Coronary Heart Disease
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)
- Prediabetes
- 104.0
- C2
- F
- hispanic

Informações do Paciente **de51c590-ed3f-c764-3001-57a3001b0f43** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Coronary Heart Disease
- Heart failure (disorder)
- Injury of heart (disorder)
- Acute respiratory distress syndrome (disorder)
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)
- Diabetes
- Diabetic retinopathy associated with type II diabetes mellitus (disorder)
- Hyperglycemia (disorder)
- Prediabetes
- 70.0
- C4
- M
- nonhispanic

Informações do Paciente **7a97a805-d262-e64a-bca9-648a0ad993d4*

In [None]:
print("### Clusterização K-Means cenário 03\n")
for patient, data in scenario03_rfc[1].items():
  print(f'Informações do Paciente **{patient}** :')
  for value in data:
    print(f'- {value}')
  print()

### Clusterização K-Means cenário 03

Informações do Paciente **70d44a6a-1359-fd33-3447-f3cb0942a2ce** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Coronary Heart Disease
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)
- Prediabetes
- 104.0
- C2
- F
- hispanic

Informações do Paciente **de51c590-ed3f-c764-3001-57a3001b0f43** :
- DIED IN ONE MONTH
- Face mask (physical object)
- Suspected COVID-19
- Hypertension
- Coronary Heart Disease
- Heart failure (disorder)
- Injury of heart (disorder)
- Acute respiratory distress syndrome (disorder)
- Acute respiratory failure (disorder)
- Oxygen administration by mask (procedure)
- Respiratory distress (finding)
- Diabetes
- Diabetic retinopathy associated with type II diabetes mellitus (disorder)
- Hyperglycemia (disorder)
- Prediabetes
- 70.0
- C2
- M
- nonhispanic

Informações do Paciente **7a97a805-d262-e64a-bca9-648a0ad993d4** :
