# Omicron Preprocessing 2


In [9]:
import pandas as pd
import numpy as np
from datetime import datetime
from xlsxwriter import Workbook
from sklearn.impute import KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Load the dataset
file_path = '/omicron_pre_processed.xlsx'
dataset = pd.read_excel(file_path)

In [11]:
dataset.shape

(218, 176)

In [12]:
# Create a mapping for the columns that need to be renamed
rename_mapping = {
    'pregnant': 'Pregnant',
    'Cr': 'CR',
    'Na': 'NA',
    'Medication_VONCON': 'Medication_voncon',
    'Medication_TYGACIL': 'Medication_tygacil',
    'Medication_AMBISOME': 'Medication_ambisome',
    'Medication_VIBRAMYCIN': 'Medication_vibramycin',
    'Medication_BRESEC': 'Medication_bresec',
    'Medication_TARGOCID': 'Medication_targocid',
    'carotid_stenosis': 'Carotid_Stenosis',
    'Medication_FLAGYL': 'Medication_flagyl',
    'Medication_CIPROXIN': 'Medication_ciproxin',
    'Medication_TINZAPARIN': 'Medication_tinzaparin',
    'Medication_ZOVIRAX': 'Medication_zovirax',
    'Medication_BACTRIMEL': 'Medication_bactrimel',
    'Medication_MEDROL': 'Medication_medrol',
    'Medication_GARAMYCIN': 'Medication_garamycin',
    'Medication_ECALTA': 'Medication_ecalta',
    'Medication_CUBICIN': 'Medication_cubicin',
    'Na': 'NA',
    'Medication_TAVANIC': 'Medication_tavanic',
    'Medication_MERONEM': 'Medication_meronem',
    'Medication_DALACIN': 'Medication_dalacin',
    'Medication_ZAVICEFTA': 'Medication_zavicefta',
    'Medication_FUNGUSTATIN': 'Medication_fungustatin',
    'Medication_BRIKLIN': 'Medication_briklin',
    'Medication_FONDAPARINUX': 'Medication_fondaparinux',
    'Medication_MEFOXIL': 'Medication_mefoxil',
    'Medication_ROCEPHIN': 'Medication_rocephin'
}

# Rename columns in delta_pre_processed dataframe
delta_renamed = dataset.rename(columns=rename_mapping)

# Verify the renaming
renamed_columns = delta_renamed.columns

renamed_columns


Index(['Subject_ID', 'AGE', 'GENDER', 'LOS', 'DAYS OF SYMPTOMS', 'FEVER',
       'COUGH', 'FATIGUE', 'DIARRHEAS', 'DYSPNEA',
       ...
       'Medication_ZYVOXID', 'Medication_AMBISONE', 'Medication_ENOXAPARIN',
       'Medication_enoxaparin', 'Medication_PLAVIX', 'Medication_fondaparinux',
       'Medication_SINTROM', 'Medication_DAKTARIN', 'Medication_tavanic',
       'Medication_cubicin'],
      dtype='object', length=176)

In [13]:
# Correct matches and remove incorrect ones
correct_matches = {
    'COUGH': 'COUGHx',
    'DIARRHEAS': 'DIARRHEASx',
    'DYSPNEA': 'DYSPNEAx',
    'FATIGUE': 'FATIGUEx',
    'FEVER': 'FEVERx',
    'FIO2 eisagwgh': 'FIO2 eisagwgh_cleaned',
    'D-DIMERS': 'D-DIMERS_cleaned',
    'Medication_AVELOX': 'Medication_avelox',
    'Medication_AMBISOME': 'Medication_ambisome',
    'Medication_BACTRIMEL': 'Medication_bactrimel',
    'Medication_BRESEC': 'Medication_bresec',
    'Medication_BRIKLIN': 'Medication_briklin',
    'Medication_CIPROXIN': 'Medication_ciproxin',
    'Medication_CUBICIN': 'Medication_cubicin',
    'Medication_DALACIN': 'Medication_dalacin',
    'Medication_ECALTA': 'Medication_ecalta',
    'Medication_ENOXAPARIN': 'Medication_enoxaparin',
    'Medication_FLAGYL': 'Medication_flagyl',
    'Medication_FONDAPARINUX': 'Medication_fondaparinux',
    'Medication_FUNGUSTATIN': 'Medication_fungustatin',
    'Medication_GARAMYCIN': 'Medication_garamycin',
    'Medication_MEDROL': 'Medication_medrol',
    'Medication_MEFOXIL': 'Medication_mefoxil',
    'Medication_MERONEM': 'Medication_meronem',
    'Medication_ROCEPHIN': 'Medication_rocephin',
    'Medication_TARGOCID': 'Medication_targocid',
    'Medication_TAVANIC': 'Medication_tavanic',
    'Medication_TAZOCIN': 'Medication_tazocin',
    'Medication_TINZAPARIN': 'Medication_tinzaparin',
    'Medication_TYGACIL': 'Medication_tygacil',
    'Medication_VIBRAMYCIN': 'Medication_vibramycin',
    'Medication_VONCON': 'Medication_voncon',
    'Medication_ZAVICEFTA': 'Medication_zavicefta',
    'Medication_ZINFORO': 'Medication_zinforo',
    'Medication_ZITHROMAX': 'Medication_zithromax',
    'Medication_ZOVIRAX': 'Medication_zovirax',
    'Medication_ZYVOXID': 'Medication_zyvoxid'
}

# Rename the columns in delta_pre_processed dataframe based on the correct matches
delta_renamed2 = delta_renamed.rename(columns=correct_matches)

# Verify the renaming
renamed_columns = delta_renamed2.columns


renamed_columns


Index(['Subject_ID', 'AGE', 'GENDER', 'LOS', 'DAYS OF SYMPTOMS', 'FEVERx',
       'COUGHx', 'FATIGUEx', 'DIARRHEASx', 'DYSPNEAx',
       ...
       'Medication_zyvoxid', 'Medication_AMBISONE', 'Medication_enoxaparin',
       'Medication_enoxaparin', 'Medication_PLAVIX', 'Medication_fondaparinux',
       'Medication_SINTROM', 'Medication_DAKTARIN', 'Medication_tavanic',
       'Medication_cubicin'],
      dtype='object', length=176)

In [None]:
correspondence = {
    'ANAKINRA_Processed': None,
    'Abdominal_Aortic_Aneurysm': None,
    'Acne': None,
    'Alcohol': None,
    'Anemia': None,
    'Ankylosing_Spondylitis': None,
    'Aortic_Aneurysm': None,
    'Aortic_Valve_Plasty': None,
    'BARITICINIB_Processed': None,
    'BMI': None,
    'Bedridden': None,
    'Beta_Thalassemia_Heterozygous': None,
    'Body_Mass_Index': None,
    'CM/AF_Presence': 'CM/AF',
    'CM/CAD_Presence': 'CM/CAD',
    'CM/CHF_Presence': 'CM/CHF',
    'CM/CNS_Presence': 'CM/CNS',
    'CM/COPD_Presence': 'CM/COPD',
    'CM/DM_Presence': 'CM/DM',
    'CM/GI_Presence': 'CM/GI',
    'CM/HBP_Presence': 'CM/HBP',
    'CM/asthma_Presence': 'CM/asthma',
    'CM/lipid_Presence': 'CM/lipid',
    'CM/metabolic/other_Presence': 'CM/metabolic/other',
    'CM/neoplasm_Presence': 'CM/neoplasm',
    'CM/renal_Presence': 'CM/renal',
    'Carotid_Atherosclerosis': None,
    'Chronic_Kidney_Disease': None,
    'Chronic_Lymphocytic_Leukemia': None,
    'DEXATON_Processed': None,
    'DM REGISTRY_Presence': None,
    'Hemolytic_Anemia': None,
    'Hepatitis_B_Virus': None,
    'Heterozygous_Factor_V_Leiden': None,
    'INFILTRATE_numerical': 'infiltrate_int',
    'Idiopathic_Tremor': None,
    'Intellectual_Disability': None,
    'KALETRA_Processed': None,
    'Leukemia': None,
    'Lymphoma': None,
    'Medication_abixaban': None,
    'Medication_augmentin': None,
    'Medication_begalin': None,
    'Medication_caspofungin': None,
    'Medication_colchicine': None,
    'Medication_colistin': 'Medication_COLLISTIN',
    'Medication_fosfomycin': None,
    'Medication_minocin': None,
    'Medication_rivaroxaban': None,
    'Medication_solumedrol': None,
    'Medication_solvetan': None,
    'Medication_tigacil': None,
    'Medication_xymbalta': None,
    'Medication_zebaxa': None,
    'Medication_zerbaxa': None,
    'Medication_zinforo_': 'Medication_zinforo.1',
    'Medication_zovirax_solucortef': None,
    'Metallic_Valve': None,
    'Multiple_Myeloma': None,
    'Myelodysplastic_Syndrome': None,
    'NON_HODGKIN_': None,
    'Nephrolithiasis': None,
    'PLAQUENIL_Processed': None,
    'Pacemaker': None,
    'Psychiatric_Disorder': None,
    'Pulmonary_Embolism': 'pe_years_ago',
    'REMDESIVIR_Processed': None,
    'Recurrent_Pregnancy': None,
    'Renal_Infarction': None,
    'Sarcoidosis': None,
    'TAMIFLU_Processed': None,
    'TOCILIZUMAB_Processed': None,
    'Tuberculosis': None,
    'URTIx': 'URTI',
    'Uricemia': None,
    'cm/autoimm_Presence': 'cm/autoimm'
}
# Create a dictionary for renaming columns in the dataset
rename_dict = {v: k for k, v in correspondence.items() if v is not None}

# Renaming columns in dataset
delta_renamed3 = delta_renamed2.rename(columns=rename_dict)
