In [129]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Hiperparametri eksperimenta

In [85]:
# PreviousAdmissionDays
PREV_ADM_DAYS_MEDIAN = 1055

# Height_Discharge
HEIGHT_MEDIAN_MALE = 176
HEIGHT_MEDIAN_FEMALE = 162
HEIGHT_MEDIAN_GENDER_MISSING = 172
HEIGHT_LOWER_LIM = 130
HEIGHT_UPPER_LIM = 204

# Weigth_Discharge
WEIGHT_MEDIAN_MALE = 90
WEIGHT_MEDIAN_FEMALE = 75
WEIGHT_MEDIAN_GENDER_MISSING = 85
WEIGHT_LOWER_LIM = 33
WEIGHT_UPPER_LIM = 199

GRUPE_LJEKOVA = {
  "Antibiotici": [
    "amoxicillin_count",
    "amoxicillin_and_betalactamase_inhibitor_count",
    "ampicillin_count",
    "ampicillin_and_betalactamase_inhibitor_count",
    "azithromycin_count",
    "cefalexin_count",
    "cefazolin_count",
    "cefepime_count",
    "ceftazidime_count",
    "ceftazidime_and_betalactamase_inhibitor_count",
    "ceftriaxone_count",
    "cefuroxime_count",
    "chloramphenicol_count",
    "ciprofloxacin_count",
    "clindamycin_count",
    "colistin_count",
    "erythromycin_count",
    "flucloxacillin_count",
    "gentamicin_count",
    "meropenem_count",
    "metronidazole_count",
    "moxifloxacin_count",
    "vancomycin_count"
  ],
  "Antikoagulanti": [
    "apixaban_count",
    "dabigatran_etexilate_count",
    "enoxaparin_count",
    "fondaparinux_count",
    "warfarin_count"
  ],

  "Antitrombotici": [
    "clopidogrel_count"
   ],

  "Antidijabetici": [
    "acarbose_count",
    "dapagliflozin_count",
    "empagliflozin_count",
    "gliclazide_count",
    "glimepiride_count",
    "insulin_aspart_count",
    "insulin_glargine_count",
    "insulin_human_count",
    "metformin_count",
    "metformin_and_alogliptin_count",
    "metformin_and_empagliflozin_count",
    "metformin_and_vildagliptin_count"
  ],
  "NSAID analgetici": [
    "diclofenac_count",
    "ibuprofen_count",
    "ketoprofen_count",
    "tramadol_and_paracetamol_count"

 ],
  "Opioidni analgetici": [
    "fentanyl_count",
    "morphine_count",
    "tramadol_count",
    "tramadol_and_paracetamol_count"
  ],
 "Antihipertenzivi": [
    "amlodipine_count",
    "diltiazem_count",
    "doxazosin_count",
    "losartan_count",
    "losartan_and_diuretics_count"
  ],
  "Beta blokatori": [
    "atenolol_count",
    "bisoprolol_count",
    "carvedilol_count",
    "metoprolol_count",
    "nebivolol_count"
  ],

 "ACE inhibitori": [
    "lisinopril_count",
    "perindopril_count",
    "ramipril_count"
 ],
  "Diuretici": [
    "furosemide_count",
    "spironolactone_count",
    "torasemide_count"
 ],
  "Statini": [
    "atorvastatin_count",
    "rosuvastatin_count"

  ],
  "Antipsihotici": [
    "haloperidol_count",
    "quetiapine_count",
    "risperidone_count"
  ],
  "Antiepileptici": [
    "carbamazepine_count",
    "lamotrigine_count",
    "levetiracetam_count",
    "valproic_acid_count" 
 ], 
  "Sedativi i anksiolitici": [
    "alprazolam_count",
    "bromazepam_count",
    "diazepam_count",
    "lorazepam_count",
    "midazolam_count",
    "zolpidem_count"
  ],
  "Antigiht": [
    "allopurinol_count",
    "colchicine_count"
 ],

  "Proton pump inhibitors": [
   "esomeprazole_count", 
   "pantoprazole_count"
 
  ],
  "Antihistaminici": [
    "loratadine_count"
  ],
  "Antiasmatici": [
    "aminophylline_count",
    "ipratropium_bromide_count",
    "salbutamol_count",
    "salbutamol_and_ipratropium_bromide_count"
  ],
  "Antiviralni": [
    "aciclovir_count",
    "oseltamivir_count"
  ],
  "Vitamini i Suplementi": [
    "colecalciferol_count"
  ],
  "Antiemetici": [
    "ondansetron_count"
 ],

 "Thyroid therapy": [
    "levothyroxine_sodium_count"
 
  ],
  "Antifungalni": [
    "fluconazole_count",
    "miconazole_count",
    "clotrimazole_count"
  ],
  "Antihipertenzivi": [
    "amlodipine_count",
    "doxazosin_count",
    "losartan_count",
    "losartan_and_diuretics_count"

  ],
  "Imunosupresivi": [
    "dexamethasone_count"
  ],
  "Kortikosteroidi": [
    "dexamethasone_count"
 ]
 
}


In [89]:
# Laurini ljekovi koji se ne pojavljuju u nasem datasetu
not_appearing = []
all_laura_ljek = []
for k, v in GRUPE_LJEKOVA.items():
    for ljek in v: 
        all_laura_ljek.append(ljek)
        if ljek not in all.columns: 
            not_appearing.append(ljek)
not_appearing

[]

['doxycycline_count',
 'acetaminophen_count',
 'hydrochlorothiazide_count',
 'azathioprine_count'] -> ovi ljekovi iz laurinog .txt filea se ne pojavljuju u skupu podataka pa sam ih maknuo iz gornjeg dicta

In [None]:
# Kolone iz naseg dataseta koje se ne pojavljuju u laurinim ljekovima
for l in all.columns[16:]:
    if l not in all_laura_ljek:
        print(l)

# Funkcije za ispravljanje dataset-a

In [91]:
df = pd.read_csv('../data/train.csv', index_col=0)
df_test = pd.read_csv('../data/test.csv', index_col=0)
all = pd.concat([df, df_test])

In [92]:
age_groups_sorted = sorted(list(all.Age_Group.unique()))
age_group_numeric_mapping = {ag: i for i, ag in enumerate(age_groups_sorted)}
age_group_numeric_mapping

{'18-22': 0,
 '23-27': 1,
 '28-32': 2,
 '33-37': 3,
 '38-42': 4,
 '43-47': 5,
 '48-52': 6,
 '53-57': 7,
 '58-62': 8,
 '63-67': 9,
 '68-72': 10,
 '73-77': 11,
 '78-82': 12,
 '83-87': 13,
 '88-92': 14,
 '93-97': 15}

In [93]:
def fix_PreviousAdmissionDays(df):
    df['PrevAdmDaysAvail'] = (df.PreviousAdmissionDays > 0) * 1.0 # Valuable information for prediction
    df.loc[df.PreviousAdmissionDays == -8, 'PreviousAdmissionDays'] = PREV_ADM_DAYS_MEDIAN
    return df

def fix_Weigth_Discharge(df):
    gender_w_medians = {
        'Ž': WEIGHT_MEDIAN_FEMALE,
        'M': WEIGHT_MEDIAN_MALE,
        None: WEIGHT_MEDIAN_GENDER_MISSING
    }
    row_indexer = (df.Weight_Discharge < WEIGHT_LOWER_LIM) | (df.Weight_Discharge > WEIGHT_UPPER_LIM)
    df.loc[row_indexer, 'Weight_Discharge'] = df[row_indexer].Gender.map(gender_w_medians)
    return df

def fix_Heigth_Discharge(df):
    gender_h_medians = {
        'Ž': HEIGHT_MEDIAN_FEMALE,
        'M': HEIGHT_MEDIAN_MALE,
        None: HEIGHT_MEDIAN_GENDER_MISSING
    }
    row_indexer = (df.Height_Discharge < HEIGHT_LOWER_LIM) | (df.Height_Discharge > HEIGHT_UPPER_LIM)
    df.loc[row_indexer, 'Height_Discharge'] = df[row_indexer].Gender.map(gender_h_medians)
    return df

def aggregate_mediactions(df):
    to_drop = []
    for med_group, list_of_meds in GRUPE_LJEKOVA.items():
        df[f"{med_group}_count"] = df[list_of_meds].sum(axis=1)
        to_drop.extend(list_of_meds)
    df.drop(columns=to_drop, inplace=True)
    return df

def convert_age_groups_to_numeric(df):
    df['Age_Group'] = df.Age_Group.map(age_group_numeric_mapping)
    return df

def fix_dataset(df):
    df = fix_PreviousAdmissionDays(df)
    df = fix_Weigth_Discharge(df)
    df = fix_Heigth_Discharge(df)
    df = aggregate_mediactions(df)
    df = convert_age_groups_to_numeric(df)
    return df
    

In [94]:
all = fix_dataset(all)

In [95]:
all.shape

(35923, 206)

In [96]:
all.head()

Unnamed: 0,AdmissionDx,AdmissionType,PreviousAdmissionDays,LOS,Age_Group,Gender,Surgery_Count,LOS_ICU,Discharge_Specialty,Dx_Discharge,Discharge_Status,Weight_Discharge,Height_Discharge,Education,Current_Work_Status,Label,acetylsalicylic_acid_count,albumin_count,alprostadil_count,amiodarone_count,atropine_count,b03aa07_count,benzathine_benzylpenicillin_count,betamethasone_count,butylscopolamine_count,calcium_chloride_count,carbohydrates_count,chlorhexidine_count,chloropyramine_count,clonazepam_count,coagulation_factor_ix_ii_vii_and_x_in_combination_count,coagulation_factor_viia_count,combinations_count,dexamethasone_and_antiinfectives_count,dexmedetomidine_count,diltiazem_count,dobutamine_count,electrolytes_count,epinephrine_count,eplerenone_count,escitalopram_count,etomidate_count,exemestane_count,ferric_oxide_polymaltose_complexes_count,ferrous_fumarate_count,flumazenil_count,folic_acid_count,fosfomycin_count,gliquidone_count,glyceryl_trinitrate_count,heparin_count,hydrocortisone_count,indapamide_count,indometacin_count,iodixanol_count,iohexol_count,isosorbide_dinitrate_count,isosorbide_mononitrate_count,ivabradine_count,ketamine_count,lacidipine_count,lactulose_count,lercanidipine_count,levobupivacaine_count,levodopa_and_decarboxylase_inhibitor_count,levofloxacin_count,lidocaine_count,linagliptin_count,linezolid_count,loperamide_count,macrogol_combinations_count,mannitol_count,memantine_count,mesalazine_count,metamizole_sodium_count,methylphenobarbital_count,methylprednisolone_count,metildigoxin_count,metoclopramide_count,mirtazapine_count,mometasone_count,montelukast_count,moxonidine_count,mupirocin_count,n02bf01_count,n02bf02_count,nadroparin_count,naloxone_count,neostigmine_count,nitrazepam_count,nitrofurantoin_count,norepinephrine_count,norfloxacin_count,octreotide_count,omega3triglycerides_incl_other_esters_and_acids_count,organoheparinoid_count,oxazepam_count,paracetamol_count,paroxetine_count,perindopril_amlodipine_and_indapamide_count,perindopril_and_amlodipine_count,perindopril_and_diuretics_count,pethidine_count,phenobarbital_count,pholcodine_count,phytomenadione_count,piperacillin_and_betalactamase_inhibitor_count,potassium_chloride_count,potassium_different_salts_in_combination_count,prasugrel_count,prednisone_count,promazine_count,propafenone_count,propofol_count,protamine_count,rabeprazole_count,ramipril_and_amlodipine_count,ramipril_and_diuretics_count,ranitidine_count,ranolazine_count,repaglinide_count,rifampicin_count,rivaroxaban_count,rocuronium_bromide_count,rosuvastatin_and_ezetimibe_count,salmeterol_count,salmeterol_and_fluticasone_count,sertraline_count,sevoflurane_count,silver_sulfadiazine_count,sitagliptin_count,sotalol_count,sufentanil_count,sulfamethoxazole_and_trimethoprim_count,sulpiride_count,tamsulosin_count,tamsulosin_and_dutasteride_count,theophylline_count,thiamazole_count,thiethylperazine_count,thiopental_count,tianeptine_count,ticagrelor_count,tiotropium_bromide_count,tobramycin_count,trandolapril_count,tranexamic_acid_count,trimetazidine_count,trospium_count,urapidil_count,valsartan_count,valsartan_amlodipine_and_hydrochlorothiazide_count,valsartan_and_sacubitril_count,venlafaxine_count,verapamil_count,x99nijenavedenowho_count,zofenopril_count,A,B,C,D,E,F,G,H,I,J,K,L,M,N,Q,R,S,T,U,Y,Z,Probability_0,Probability_1,PrevAdmDaysAvail,Antibiotici_count,Antikoagulanti_count,Antitrombotici_count,Antidijabetici_count,NSAID analgetici_count,Opioidni analgetici_count,Antihipertenzivi_count,Beta blokatori_count,ACE inhibitori_count,Diuretici_count,Statini_count,Antipsihotici_count,Antiepileptici_count,Sedativi i anksiolitici_count,Antigiht_count,Proton pump inhibitors_count,Antihistaminici_count,Antiasmatici_count,Antiviralni_count,Vitamini i Suplementi_count,Antiemetici_count,Thyroid therapy_count,Antifungalni_count,Imunosupresivi_count,Kortikosteroidi_count
1,I,Elektivni,1055,1,13,Ž,0,0,3010100,I,KUĆI,53.0,156.0,ZAVRŠENA OSNOVNA ŠKOLA,DOMAĆICA,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,,,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,I,Elektivni,1055,16,10,M,1,0,3100400,I,KUĆI,96.0,167.0,NEPOZNAT OBRAZOVNI STATUS,UMIROVLJENIK,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,,,0.0,5,2,0,0,3,3,0,0,1,0,1,0,0,3,0,1,0,0,0,0,0,0,0,0,0
3,I,Elektivni,279,3,0,Ž,1,0,3010100,I,KUĆI,52.0,163.0,ZAVRŠENO VISOKO OBRAZOVANJE,STUDENT,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,,,1.0,2,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,I,Elektivni,1905,2,13,M,0,0,3010100,I,KUĆI,95.0,175.0,ZAVRŠENA SREDNJA ŠKOLA,UMIROVLJENIK,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,1,0,0,0,0,0,0,0,,,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,I,Elektivni,1055,2,11,M,0,0,3010100,I,KUĆI,82.0,174.0,ZAVRŠENO VISOKO OBRAZOVANJE,REDOVAN POSAO,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,,,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [98]:
all.isna().sum()[all.isna().sum() > 0]

AdmissionDx              498
Gender                     2
Education                280
Current_Work_Status       50
Label                   7336
Probability_0          35923
Probability_1          35923
dtype: int64

Counts of missing values

# OneHot encoding and Scaling functions

In [125]:
def oneHot_encode_feature(df, feature):
    ohe = OneHotEncoder(drop='if_binary', handle_unknown='ignore')
    ohe.fit(df[df[feature].notnull()][[feature]])
    X = ohe.transform(df[[feature]])
    df[ohe.get_feature_names_out()] = X.toarray()
    df.drop(columns=[feature], inplace=True)
    return df

def scale_numeric_features(df, numeric_cols):
    scaler = StandardScaler()
    scaler.fit(df[numeric_cols])
    df[numeric_cols] = scaler.transform(df[numeric_cols])
    return df

def encode_and_scale_features(df, enc_features, scale_features):
    for f in enc_features:
        df = oneHot_encode_feature(df, f)

    df = scale_numeric_features(df, scale_features)
    return df

# Obtaining Preprocessed dataset

In [137]:
cols_to_onehot_encode = [
    'AdmissionDx',
    'AdmissionType',
    'Gender',
    'Discharge_Specialty',
    'Dx_Discharge',
    'Discharge_Status',
    'Education',
    'Current_Work_Status'
]

cols_to_scale= [
    'PreviousAdmissionDays',
    'LOS',
    'Age_Group',
    'Surgery_Count',
    'LOS_ICU',
    'Weight_Discharge',
    'Height_Discharge'
]

In [138]:
df_train = pd.read_csv('../data/train.csv', index_col=0)
df_test = pd.read_csv('../data/test.csv', index_col=0)
df_train['Train'] = 1
df_test['Train'] = 0
all = pd.concat([df_train, df_test])

In [139]:
all = fix_dataset(all)
all = encode_and_scale_features(
    all,
    enc_features=cols_to_onehot_encode,
    scale_features=cols_to_scale
)



In [140]:
all.head()

Unnamed: 0,PreviousAdmissionDays,LOS,Age_Group,Surgery_Count,LOS_ICU,Weight_Discharge,Height_Discharge,Label,acetylsalicylic_acid_count,albumin_count,alprostadil_count,amiodarone_count,atropine_count,b03aa07_count,benzathine_benzylpenicillin_count,betamethasone_count,butylscopolamine_count,calcium_chloride_count,carbohydrates_count,chlorhexidine_count,chloropyramine_count,clonazepam_count,coagulation_factor_ix_ii_vii_and_x_in_combination_count,coagulation_factor_viia_count,combinations_count,dexamethasone_and_antiinfectives_count,dexmedetomidine_count,diltiazem_count,dobutamine_count,electrolytes_count,epinephrine_count,eplerenone_count,escitalopram_count,etomidate_count,exemestane_count,ferric_oxide_polymaltose_complexes_count,ferrous_fumarate_count,flumazenil_count,folic_acid_count,fosfomycin_count,gliquidone_count,glyceryl_trinitrate_count,heparin_count,hydrocortisone_count,indapamide_count,indometacin_count,iodixanol_count,iohexol_count,isosorbide_dinitrate_count,isosorbide_mononitrate_count,ivabradine_count,ketamine_count,lacidipine_count,lactulose_count,lercanidipine_count,levobupivacaine_count,levodopa_and_decarboxylase_inhibitor_count,levofloxacin_count,lidocaine_count,linagliptin_count,linezolid_count,loperamide_count,macrogol_combinations_count,mannitol_count,memantine_count,mesalazine_count,metamizole_sodium_count,methylphenobarbital_count,methylprednisolone_count,metildigoxin_count,metoclopramide_count,mirtazapine_count,mometasone_count,montelukast_count,moxonidine_count,mupirocin_count,n02bf01_count,n02bf02_count,nadroparin_count,naloxone_count,neostigmine_count,nitrazepam_count,nitrofurantoin_count,norepinephrine_count,norfloxacin_count,octreotide_count,omega3triglycerides_incl_other_esters_and_acids_count,organoheparinoid_count,oxazepam_count,paracetamol_count,paroxetine_count,perindopril_amlodipine_and_indapamide_count,perindopril_and_amlodipine_count,perindopril_and_diuretics_count,pethidine_count,phenobarbital_count,pholcodine_count,phytomenadione_count,piperacillin_and_betalactamase_inhibitor_count,potassium_chloride_count,potassium_different_salts_in_combination_count,prasugrel_count,prednisone_count,promazine_count,propafenone_count,propofol_count,protamine_count,rabeprazole_count,ramipril_and_amlodipine_count,ramipril_and_diuretics_count,ranitidine_count,ranolazine_count,repaglinide_count,rifampicin_count,rivaroxaban_count,rocuronium_bromide_count,rosuvastatin_and_ezetimibe_count,salmeterol_count,salmeterol_and_fluticasone_count,sertraline_count,sevoflurane_count,silver_sulfadiazine_count,sitagliptin_count,sotalol_count,sufentanil_count,sulfamethoxazole_and_trimethoprim_count,sulpiride_count,tamsulosin_count,tamsulosin_and_dutasteride_count,theophylline_count,thiamazole_count,thiethylperazine_count,thiopental_count,tianeptine_count,ticagrelor_count,tiotropium_bromide_count,tobramycin_count,trandolapril_count,tranexamic_acid_count,trimetazidine_count,trospium_count,urapidil_count,valsartan_count,valsartan_amlodipine_and_hydrochlorothiazide_count,valsartan_and_sacubitril_count,venlafaxine_count,verapamil_count,x99nijenavedenowho_count,zofenopril_count,A,B,C,D,E,F,G,H,I,J,K,L,M,N,Q,R,S,T,U,Y,Z,Train,Probability_0,Probability_1,PrevAdmDaysAvail,Antibiotici_count,Antikoagulanti_count,Antitrombotici_count,Antidijabetici_count,NSAID analgetici_count,Opioidni analgetici_count,Antihipertenzivi_count,Beta blokatori_count,ACE inhibitori_count,Diuretici_count,Statini_count,Antipsihotici_count,Antiepileptici_count,Sedativi i anksiolitici_count,Antigiht_count,Proton pump inhibitors_count,Antihistaminici_count,Antiasmatici_count,Antiviralni_count,Vitamini i Suplementi_count,Antiemetici_count,Thyroid therapy_count,Antifungalni_count,Imunosupresivi_count,Kortikosteroidi_count,AdmissionDx_B,AdmissionDx_C,AdmissionDx_D,AdmissionDx_E,AdmissionDx_F,AdmissionDx_G,AdmissionDx_H,AdmissionDx_I,AdmissionDx_J,AdmissionDx_K,AdmissionDx_L,AdmissionDx_M,AdmissionDx_N,AdmissionDx_Q,AdmissionDx_R,AdmissionDx_S,AdmissionDx_T,AdmissionDx_Y,AdmissionDx_Z,AdmissionType_Hitni,Gender_Ž,Discharge_Specialty_3010100,Discharge_Specialty_3100400,Discharge_Specialty_3100600,Discharge_Specialty_3190100,Discharge_Specialty_3190200,Dx_Discharge_C,Dx_Discharge_D,Dx_Discharge_E,Dx_Discharge_G,Dx_Discharge_H,Dx_Discharge_I,Dx_Discharge_J,Dx_Discharge_K,Dx_Discharge_L,Dx_Discharge_N,Dx_Discharge_Q,Dx_Discharge_R,Dx_Discharge_T,Dx_Discharge_Z,Discharge_Status_KUĆI,Discharge_Status_LIJEČENJE OKONČANO PROTIVNO SAVJETU DOKTORA,Discharge_Status_OSTALO,Discharge_Status_U DRUGU STACIONARNU ZDRAVSTVENU USTANOVU,Discharge_Status_UMRO (NIJE OBDUCIRAN),Education_NEPOZNAT OBRAZOVNI STATUS,Education_NIJE POHAĐALA OSNOVNU ŠKOLU,Education_NIJE ZAVRŠILA OSNOVNU ŠKOLU,Education_ZAVRŠENA OSNOVNA ŠKOLA,Education_ZAVRŠENA SREDNJA ŠKOLA,Education_ZAVRŠENA VIŠA ŠKOLA,Education_ZAVRŠENO VISOKO OBRAZOVANJE,Current_Work_Status_DOMAĆICA,Current_Work_Status_NEPOZNATO,Current_Work_Status_NEZAPOSLEN/POVREMENI POSAO,Current_Work_Status_POLJOPRIVREDNIK,Current_Work_Status_REDOVAN POSAO,Current_Work_Status_SAMOSTALNA DJELATNOST,Current_Work_Status_STUDENT,Current_Work_Status_UMIROVLJENIK,Current_Work_Status_UČENIK
1,-0.289586,-0.482977,1.69258,-0.458037,-0.050516,-2.007681,-1.644906,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,2,1,,,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.289586,1.883533,0.379549,1.910477,-0.050516,0.652788,-0.468054,0.0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,2,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,,,0.0,5,2,0,0,3,3,0,0,1,0,1,0,0,3,0,1,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,-0.800832,-0.167442,-3.997224,1.910477,-0.050516,-2.069552,-0.896,0.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,,,1.0,2,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.270413,-0.32521,1.69258,-0.458037,-0.050516,0.590916,0.387838,0.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,1,0,0,0,0,0,0,0,1,,,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,-0.289586,-0.32521,0.817226,-0.458037,-0.050516,-0.213411,0.280851,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,1,,,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [143]:
df_train = all[all.Train == 1].copy()
df_test = all[all.Train == 0].copy()
df_train.drop(columns=['Train'], inplace=True)
df_train.drop(columns=['Probability_0'], inplace=True)
df_train.drop(columns=['Probability_1'], inplace=True)
df_test.drop(columns=['Train'], inplace=True)
df_test.drop(columns=['Probability_0'], inplace=True)
df_test.drop(columns=['Probability_1'], inplace=True)
df_test.drop(columns=['Label'], inplace=True)

In [144]:
df_train.shape

(28587, 257)

In [145]:
df_test.shape

(7336, 259)

In [148]:
feature_cols = list(df_train.columns)
feature_cols.remove('Label')

In [149]:
X_train = df_train[feature_cols].values
y_train = df_train['Label'].values
X_test = df_test[feature_cols].values

X_train, y_train, i X_test su numpy matrice spremne za treniranje modela.