In [139]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB  # Add Gaussian Naive Bayes
import warnings 
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split  # Import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
import csv
from sklearn.preprocessing import RobustScaler
from sklearn.impute import KNNImputer

In [140]:
# Load the training dataset
df_train = pd.read_csv('train.csv')
df_train = pd.get_dummies(df_train)
text_to_find = 'noninvasive'
columns_to_drop = [col for col in df_train.columns if text_to_find in col]
df_train.drop(columns=columns_to_drop, inplace=True)

In [141]:
imr = KNNImputer(n_neighbors=10000, weights='uniform')
imr = imr.fit(df_train.values)
df_train[:] = imr.transform(df_train.values)

In [142]:
X_train = df_train.loc[:, df_train.columns != 'hospital_death']
y_train = df_train[['hospital_death']]


In [143]:
numeric_columns_train = df_train.select_dtypes(include=['int64','float64']).columns
numeric_columns_train = numeric_columns_train.drop(['RecordID', 'hospital_id', 'icu_id', 'hospital_death'])


In [144]:
rbs = RobustScaler()
X_train[numeric_columns_train] = rbs.fit_transform(X_train[numeric_columns_train])


In [145]:
# Train a Naive Bayes classifier (Gaussian Naive Bayes) on the entire training dataset
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

In [146]:
# Load the test dataset
df_test = pd.read_csv('test.csv')
df_test = pd.get_dummies(df_test)
columns_to_drop = [col for col in df_test.columns if text_to_find in col]
df_test.drop(columns=columns_to_drop, inplace=True)

In [147]:
imr_test = KNNImputer(n_neighbors=10000, weights='uniform')
imr_test = imr_test.fit(df_test.values)
df_test[:] = imr_test.transform(df_test.values)

In [148]:
numeric_columns_test = df_test.select_dtypes(include=['int64','float64']).columns
numeric_columns_test = numeric_columns_test.drop(['RecordID', 'hospital_id', 'icu_id'])


In [149]:
rbs_test = RobustScaler()
df_test[numeric_columns_test] = rbs_test.fit_transform(df_test[numeric_columns_test])


In [150]:
# Make predictions on the test dataset using the trained Naive Bayes classifier
nb_probs = nb_classifier.predict_proba(df_test)
nb_probs = nb_probs[:, 1]


In [151]:
# Save Naive Bayes predictions to a CSV file
filepath_nb = 'predictnb.csv'
with open(filepath_nb, mode='w', newline='') as file: 
    writer = csv.writer(file)
    c = 1
    for i in nb_probs:
        writer.writerow([50000 + c, i])
        c = c + 1






