USING NB-3 to make predictions

In [2]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.experimental import enable_iterative_imputer  # This line enables the experimental features
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
import pandas as pd
from imblearn.over_sampling import SVMSMOTE
from imblearn.under_sampling import RandomUnderSampler

train = pd.read_csv('training_data_imputed.csv')
test=pd.read_csv("test_data_no_missing_values.csv")

# Assuming 'target' is the column name for the target variable
y_train = train['diabetes_mellitus']
X_train = train.drop('diabetes_mellitus', axis=1)

y_test = test['diabetes_mellitus']
X_test = test.drop('diabetes_mellitus', axis=1)

# Scaling the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Training the Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred = rf_classifier.predict(X_test_scaled)
y_prob = rf_classifier.predict_proba(X_test_scaled)[:, 1]  # probabilities for AUC

# Metrics
classification_rep = classification_report(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print(classification_rep)
print('AUC ROC:', roc_auc)

              precision    recall  f1-score   support

           0       0.84      0.95      0.89      1613
           1       0.51      0.24      0.33       372

    accuracy                           0.82      1985
   macro avg       0.68      0.59      0.61      1985
weighted avg       0.78      0.82      0.79      1985

AUC ROC: 0.7806231626102433


KNN IMPUTER

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load the data
data = pd.read_csv('1.csv')

# Binary encoding for 'gender'
data['gender'] = data['gender'].astype('category').cat.codes

# One-hot encoding for other categorical columns
categorical_columns = ['ethnicity', 'hospital_admit_source', 'icu_admit_source', 'icu_stay_type', 'icu_type']
data = pd.get_dummies(data, columns=categorical_columns)

# Splitting the data into train and test sets
X = data.drop(['diabetes_mellitus', 'encounter_id', 'hospital_id'], axis=1)
y = data['diabetes_mellitus'].astype(int)  # Ensure the target is integer
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Imputation on training and test sets separately using KNNImputer
knn_imputer = KNNImputer(n_neighbors=5)  # Adjust n_neighbors as needed
X_train_imputed = pd.DataFrame(knn_imputer.fit_transform(X_train), columns=X_train.columns)
X_test_imputed = pd.DataFrame(knn_imputer.transform(X_test), columns=X_test.columns)

# Scaling
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train_imputed), columns=X_train_imputed.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test_imputed), columns=X_test_imputed.columns)  

# Training the Random Forest Classifier
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred = rf_classifier.predict(X_test_scaled)
y_prob = rf_classifier.predict_proba(X_test_scaled)[:, 1]  # probabilities for AUC

# Metrics
classification_rep = classification_report(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print(classification_rep)
print('AUC ROC:', roc_auc)


              precision    recall  f1-score   support

           0       0.84      0.95      0.89     30574
           1       0.66      0.32      0.43      8474

    accuracy                           0.82     39048
   macro avg       0.75      0.64      0.66     39048
weighted avg       0.80      0.82      0.79     39048

AUC ROC: 0.8158751929624575
