In [12]:
# Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Step 1: Load the dataset
# Replace 'ckd_data.csv' with your actual dataset path
data = pd.read_csv('kidney_disease.csv')

# Step 2: Data Preprocessing
# Handle missing values (imputation), normalize numerical features, and encode categorical variables
# Convert columns to numeric if possible, otherwise fill with mode for categorical columns.

# Create a LabelEncoder object
encoder = LabelEncoder()

for col in data.columns:
    if col != 'classification': # Exclude target column from encoding and scaling
        try:# Required Libraries
            data[col] = pd.to_numeric(data[col])  # Attempt to convert to numeric
        except ValueError:
            data[col].fillna(data[col].mode()[0], inplace=True)  # Fill with mode for non-numeric
            # Use the encoder object to fit and transform the column
            data[col] = encoder.fit_transform(data[col])  # Then encode categorical
    # Encode the target column separately
    elif col == 'classification':
        data[col] = encoder.fit_transform(data[col])

# Fill NaN with mean for numeric columns only
numerical_cols = data.select_dtypes(include=['number']).columns
numerical_cols = numerical_cols.drop('classification') # Exclude target from mean imputation
# Impute NaN with mean of respective column for numerical columns
for col in numerical_cols:
    data[col].fillna(data[col].mean(), inplace=True)

# Normalize numerical data (exclude target column)
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Step 3: Feature Selection
# Use correlation or feature importance to select significant features
X = data.drop('classification', axis=1)  # Replace 'target' with the name of your target column
y = data['classification']
selected_features = X.columns  # Apply feature selection techniques here if required

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)

# Step 5: ANN Model
def build_ann(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and train ANN
ann = build_ann(X_train.shape[1])
ann.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# ANN Evaluation
y_pred_ann = (ann.predict(X_test) > 0.5).astype(int)
print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))
print(classification_report(y_test, y_pred_ann))

# Step 6: Traditional ML Models for Comparison
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

# Step 7: Results Comparison
print("ROC-AUC for ANN:", roc_auc_score(y_test, y_pred_ann))
print("ROC-AUC for Random Forest:", roc_auc_score(y_test, y_pred_rf))
print("ROC-AUC for XGBoost:", roc_auc_score(y_test, y_pred_xgb))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.3937 - loss: 0.5602 - val_accuracy: 0.5000 - val_loss: 0.0230
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5391 - loss: -0.0625 - val_accuracy: 0.5000 - val_loss: -0.5587
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4717 - loss: -0.7260 - val_accuracy: 0.5000 - val_loss: -1.1865
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4917 - loss: -1.2491 - val_accuracy: 0.5000 - val_loss: -1.9575
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5060 - loss: -2.0583 - val_accuracy: 0.4688 - val_loss: -3.0382
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5271 - loss: -3.3045 - val_accuracy: 0.4531 - val_loss: -4.5713
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


XGBoost Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        52
           2       1.00      1.00      1.00        28

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80

ROC-AUC for ANN: 0.9615384615384616
ROC-AUC for Random Forest: 1.0
ROC-AUC for XGBoost: 1.0


Parameters: { "use_label_encoder" } are not used.



**CKD prediction**<br>
ANN model to evulate dataset provided by kaggle
Compared with random forest and xg boost

------------------------------------------------
**outcome**<br>
ANN: 0.6(60%)
Random Forest: 1.0(100%)
XG Boost:1.0(100%)

------------------------------------------------

**conclusion**<br>
Random forest and XG Boost both provide accurate solution compared to ANN


In [13]:
pip install imbalanced-learn
# Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam


# Step 1: Load the dataset
# Replace 'ckd_data.csv' with your actual dataset path
data = pd.read_csv('kidney_disease.csv')

# Step 2: Data Preprocessing
# Handle missing values (imputation), normalize numerical features, and encode categorical variables
# Convert columns to numeric if possible, otherwise fill with mode for categorical columns.

# Create a LabelEncoder object
encoder = LabelEncoder()

for col in data.columns:
    if col != 'classification': # Exclude target column from encoding and scaling
        try:
            data[col] = pd.to_numeric(data[col])  # Attempt to convert to numeric
        except ValueError:
            data[col].fillna(data[col].mode()[0], inplace=True)  # Fill with mode for non-numeric
            # Use the encoder object to fit and transform the column
            data[col] = encoder.fit_transform(data[col])  # Then encode categorical
    # Encode the target column separately
    elif col == 'classification':
        data[col] = encoder.fit_transform(data[col])

# Fill NaN with mean for numeric columns only
numerical_cols = data.select_dtypes(include=['number']).columns
numerical_cols = numerical_cols.drop('classification') # Exclude target from mean imputation
# Impute NaN with mean of respective column for numerical columns
for col in numerical_cols:
    data[col].fillna(data[col].mean(), inplace=True)

# Normalize numerical data (exclude target column)
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Step 3: Feature Selection
# Use correlation or feature importance to select significant features
X = data.drop('classification', axis=1)  # Replace 'target' with the name of your target column
y = data['classification']
selected_features = X.columns  # Apply feature selection techniques here if required

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)

# Step 4.1: Apply SMOTE to handle class imbalance
from imblearn.over_sampling import SMOTE
from collections import Counter

print("Before SMOTE:", Counter(y_train))
# Adjust k_neighbors to be less than or equal to the smallest minority class size
# k_neighbors should be less than the number of samples in the smallest class.
# Since the error message shows n_samples_fit = 2, we set k_neighbors to 1
smote = SMOTE(random_state=42, k_neighbors=1)  # Set k_neighbors to 1
X_train, y_train = smote.fit_resample(X_train, y_train)
print("After SMOTE:", Counter(y_train))

# Step 4.2: Compute class weights for handling imbalance during ANN training
from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

# Step 5: ANN Model
def build_ann(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification
    model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and train ANN with class weights
ann = build_ann(X_train.shape[1])
ann.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    class_weight=class_weights_dict  # Add class weights here
)
# Build and train ANN
ann = build_ann(X_train.shape[1])
ann.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# ANN Evaluation
y_pred_ann = (ann.predict(X_test) > 0.5).astype(int)
print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))
print(classification_report(y_test, y_pred_ann))

# Step 6: Traditional ML Models for Comparison
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

# Step 7: Results Comparison
print("ROC-AUC for ANN:", roc_auc_score(y_test, y_pred_ann))
print("ROC-AUC for Random Forest:", roc_auc_score(y_test, y_pred_rf))
print("ROC-AUC for XGBoost:", roc_auc_score(y_test, y_pred_xgb))


SyntaxError: invalid syntax (584148201.py, line 1)

**Attempt to increase accuracy**<br>
Class Imbalance Handling

**Result**<br>
Accuracy of ANN droped from 0.6 to 0.565

**Hyperparameter Tuning and Early Stopping**

In [None]:
# Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Step 1: Load the dataset
# Replace 'ckd_data.csv' with your actual dataset path
data = pd.read_csv('kidney_disease.csv')

# Step 2: Data Preprocessing
# Handle missing values (imputation), normalize numerical features, and encode categorical variables
# Convert columns to numeric if possible, otherwise fill with mode for categorical columns.

# Create a LabelEncoder object
encoder = LabelEncoder()

for col in data.columns:
    if col != 'classification': # Exclude target column from encoding and scaling
        try:# Required Libraries
            data[col] = pd.to_numeric(data[col])  # Attempt to convert to numeric
        except ValueError:
            data[col].fillna(data[col].mode()[0], inplace=True)  # Fill with mode for non-numeric
            # Use the encoder object to fit and transform the column
            data[col] = encoder.fit_transform(data[col])  # Then encode categorical
    # Encode the target column separately
    elif col == 'classification':
        data[col] = encoder.fit_transform(data[col])

# Fill NaN with mean for numeric columns only
numerical_cols = data.select_dtypes(include=['number']).columns
numerical_cols = numerical_cols.drop('classification') # Exclude target from mean imputation
# Impute NaN with mean of respective column for numerical columns
for col in numerical_cols:
    data[col].fillna(data[col].mean(), inplace=True)

# Normalize numerical data (exclude target column)
scaler = StandardScaler()
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

# Step 3: Feature Selection
# Use correlation or feature importance to select significant features
X = data.drop('classification', axis=1)  # Replace 'target' with the name of your target column
y = data['classification']
selected_features = X.columns  # Apply feature selection techniques here if required

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)

# Step 5: ANN Model
def build_ann(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=input_dim))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and train ANN
ann = build_ann(X_train.shape[1])
ann.fit(X_train, y_train, epochs=150, batch_size=128, validation_split=0.2)

# ANN Evaluation
y_pred_ann = (ann.predict(X_test) > 0.5).astype(int)
print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))
print(classification_report(y_test, y_pred_ann))

# Step 6: Traditional ML Models for Comparison
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

# Step 7: Results Comparison
print("ROC-AUC for ANN:", roc_auc_score(y_test, y_pred_ann))
print("ROC-AUC for Random Forest:", roc_auc_score(y_test, y_pred_rf))
print("ROC-AUC for XGBoost:", roc_auc_score(y_test, y_pred_xgb))


Epoch 1/150


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 191ms/step - accuracy: 0.1719 - loss: 0.6492 - val_accuracy: 0.2656 - val_loss: 0.4916
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.2604 - loss: 0.4603 - val_accuracy: 0.3906 - val_loss: 0.2950
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.3255 - loss: 0.2751 - val_accuracy: 0.3906 - val_loss: 0.1101
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.3750 - loss: 0.0899 - val_accuracy: 0.4219 - val_loss: -0.0691
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.4635 - loss: -0.0843 - val_accuracy: 0.4688 - val_loss: -0.2505
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - accuracy: 0.4583 - loss: -0.2815 - val_accuracy: 0.4688 - val_loss: -0.4347
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        52
           2       1.00      1.00      1.00        28

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80

XGBoost Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        52
           2       1.00      1.00      1.00        28

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80

ROC-AUC for ANN: 0.951923076923077
ROC-AUC for Random Forest: 1.0
ROC-AUC for XGBoost: 1.0


Parameters: { "use_label_encoder" } are not used.



In [None]:
# Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from imblearn.over_sampling import SMOTE

# Step 1: Load the dataset
# Replace 'ckd_data.csv' with your actual dataset path
data = pd.read_csv('kidney_disease.csv')

# Step 2: Data Preprocessing
# Handle missing values
numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
categorical_cols = data.select_dtypes(include=['object']).columns

for col in numeric_cols:
    data[col].fillna(data[col].mean(), inplace=True)

for col in categorical_cols:
    data[col].fillna('Unknown', inplace=True)  # or data[col].mode()[0]

# Encode categorical variables, excluding the target variable
categorical_cols_to_encode = categorical_cols[categorical_cols != 'classification']
data = pd.get_dummies(data, columns=categorical_cols_to_encode, drop_first=True, dummy_na=False) # dummy_na=False to avoid creating new columns for NaN

#Remove Outliers (Consider removing or adjusting this part)
#Q1 = data[numeric_cols].quantile(0.25)
#Q3 = data[numeric_cols].quantile(0.75)
#IQR = Q3 - Q1
#data = data[~((data[numeric_cols] < (Q1 - 1.5 * IQR)) | (data[numeric_cols] > (Q3 + 1.5 * IQR))).any(axis=1)]

# Normalize numerical features
scaler = StandardScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

# Assuming 'ckd' and 'notckd' are the values in 'classification'
data['classification'] = data['classification'].map({'ckd': 1, 'notckd': 0})
# Handle NaN in the target variable before splitting
data.dropna(subset=['classification'], inplace=True)

# Split data into features and target
X = data.drop('classification', axis=1)
y = data['classification']

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42, k_neighbors=1)  # k_neighbors=1 to avoid the previous error
X, y = smote.fit_resample(X, y)

# Step 3: Feature Selection (Optional, can be skipped for now)
selected_features = X.columns  # Use feature selection techniques if needed #Assign selected_features after SMOTE

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)
X_train += np.random.normal(0, 0.01, X_train.shape)


#ANN model
from keras.regularizers import l2
def build_ann(input_dim):
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=input_dim, kernel_regularizer=l2(0.01)))
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and train ANN
ann = build_ann(X_train.shape[1])
ann.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.3)

# ANN Evaluation
y_pred_ann = (ann.predict(X_test) > 0.5).astype(int)
print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))
print(classification_report(y_test, y_pred_ann))

# Step 6: Traditional ML Models for Comparison
# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# XGBoost
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

# Step 7: Results Comparison
print("ROC-AUC for ANN:", roc_auc_score(y_test, y_pred_ann))
print("ROC-AUC for Random Forest:", roc_auc_score(y_test, y_pred_rf))
print("ROC-AUC for XGBoost:", roc_auc_score(y_test, y_pred_xgb))


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna(data[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[col].fillna('Unknown', inplace=True)  # or data[col].mode()[0]
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 97ms/step - accuracy: 0.6775 - loss: 2.9853 - val_accuracy: 0.9832 - val_loss: 2.5120
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.9820 - loss: 2.3850 - val_accuracy: 0.9916 - val_loss: 2.0303
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.9920 - loss: 1.9353 - val_accuracy: 1.0000 - val_loss: 1.6809
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 1.0000 - loss: 1.6107 - val_accuracy: 1.0000 - val_loss: 1.4118
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 1.0000 - loss: 1.3517 - val_accuracy: 1.0000 - val_loss: 1.1923
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 1.0000 - loss: 1.1493 - val_accuracy: 1.0000 - val_loss: 1.0104
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━



[1m1/4[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 55ms/step



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
ANN Accuracy: 0.99
              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99        50
         1.0       1.00      0.98      0.99        50

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100

Random Forest Accuracy: 1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        50
         1.0       1.00      1.00      1.00        50

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100



Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy: 0.99
              precision    recall  f1-score   support

         0.0       1.00      0.98      0.99        50
         1.0       0.98      1.00      0.99        50

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100

ROC-AUC for ANN: 0.99
ROC-AUC for Random Forest: 1.0
ROC-AUC for XGBoost: 0.99


**with different dataset**

Keras is installed and working!
