In [18]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, RobustScaler, LabelEncoder
import csv
from sklearn.impute import KNNImputer

In [19]:

# Load the training dataset
df_train = pd.read_csv('train.csv')

In [20]:
# Label encode categorical columns
categorical_columns = df_train.select_dtypes(include=['object']).columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df_train[col] = le.fit_transform(df_train[col])
    label_encoders[col] = le

In [21]:
text_to_find = 'noninvasive'
columns_to_drop = [col for col in df_train.columns if text_to_find in col]
df_train.drop(columns=columns_to_drop, inplace=True)


In [22]:

imr = KNNImputer(n_neighbors=10000, weights='uniform')
imr = imr.fit(df_train.values)
df_train[:] = imr.transform(df_train.values)

In [23]:
X_train = df_train.loc[:, df_train.columns != 'hospital_death']
y_train = df_train[['hospital_death']]

In [24]:
numeric_columns_train = df_train.select_dtypes(include=['int64', 'float64']).columns
numeric_columns_train = numeric_columns_train.drop(['RecordID', 'hospital_id', 'icu_id', 'hospital_death'])


In [25]:
rbs = RobustScaler()
X_train[numeric_columns_train] = rbs.fit_transform(X_train[numeric_columns_train])


In [26]:

# Train a Naive Bayes classifier (Gaussian Naive Bayes) on the entire training dataset
nb_classifier = GaussianNB(var_smoothing=1e-3)
nb_classifier.fit(X_train, y_train)


In [27]:
# Load the test dataset
df_test = pd.read_csv('test.csv')


In [28]:
# Label encode categorical columns in the test dataset using the same label encoders
for col in categorical_columns:
    le = label_encoders[col]
    df_test[col] = le.transform(df_test[col])

In [29]:
text_to_find = 'noninvasive'
columns_to_drop = [col for col in df_test.columns if text_to_find in col]
df_test.drop(columns=columns_to_drop, inplace=True)


In [30]:

imr_test = KNNImputer(n_neighbors=10000, weights='uniform')
imr_test = imr_test.fit(df_test.values)
df_test[:] = imr_test.transform(df_test.values)


In [31]:
numeric_columns_test = df_test.select_dtypes(include=['int64', 'float64']).columns
numeric_columns_test = numeric_columns_test.drop(['RecordID', 'hospital_id', 'icu_id'])


In [32]:
rbs_test = RobustScaler()
df_test[numeric_columns_test] = rbs_test.fit_transform(df_test[numeric_columns_test])


In [33]:

# Make predictions on the test dataset using the trained Naive Bayes classifier
nb_probs = nb_classifier.predict_proba(df_test)
nb_probs = nb_probs[:, 1]


In [34]:
# Save Naive Bayes predictions to a CSV file
filepath_nb = 'predictnb.csv'
with open(filepath_nb, mode='w', newline='') as file:
    writer = csv.writer(file)
    c = 1
    for i in nb_probs:
        writer.writerow([50000 + c, i])
        c = c + 1