In [1]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
train_dir = '/kaggle/input/pcos-detection-using-ultrasound-images/data/train'
test_dir = '/kaggle/input/pcos-detection-using-ultrasound-images/data/test'
image_size = (224, 224)  


def load_images_from_folder(base_dir):
    images = []
    labels = []
    classes = {'infected': 1, 'notinfected': 0} 
    for label in classes:
        folder_path = os.path.join(base_dir, label)
        for image_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, image_file)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, image_size)
                images.append(img)
                labels.append(classes[label])
    return np.array(images), np.array(labels)


train_images, train_labels = load_images_from_folder(train_dir)
test_images, test_labels = load_images_from_folder(test_dir)

train_images = train_images / 255.0
test_images = test_images / 255.0

print(test_images[0])

[[[0.01568627 0.         0.        ]
  [0.03137255 0.01568627 0.01176471]
  [0.08235294 0.06666667 0.0627451 ]
  ...
  [0.         0.00392157 0.        ]
  [0.         0.01568627 0.01176471]
  [0.00784314 0.01176471 0.00392157]]

 [[0.04705882 0.03137255 0.02745098]
  [0.09803922 0.08235294 0.07843137]
  [0.21568627 0.2        0.19607843]
  ...
  [0.         0.00392157 0.        ]
  [0.         0.         0.        ]
  [0.00392157 0.00784314 0.        ]]

 [[0.33333333 0.31764706 0.31372549]
  [0.25490196 0.23921569 0.23529412]
  [0.21568627 0.2        0.19607843]
  ...
  [0.02352941 0.03137255 0.03137255]
  [0.02745098 0.03529412 0.03529412]
  [0.00392157 0.00784314 0.        ]]

 ...

 [[0.61176471 0.61176471 0.61176471]
  [0.61176471 0.61176471 0.61176471]
  [0.61176471 0.61176471 0.61176471]
  ...
  [0.61176471 0.61176471 0.61176471]
  [0.61176471 0.61176471 0.61176471]
  [0.61176471 0.61176471 0.61176471]]

 [[0.61176471 0.61176471 0.61176471]
  [0.61176471 0.61176471 0.61176471]


In [4]:
from tensorflow.keras.utils import to_categorical
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.15, random_state=42)

y_train = to_categorical(y_train, 2)
y_val = to_categorical(y_val, 2)
y_test = to_categorical(test_labels, 2)


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)


datagen.fit(X_train)

# **RESNET50**

In [6]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D


# base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

base_model = ResNet50(weights='/kaggle/input/resnet50/keras/default/1/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                      include_top=False, 
                      input_shape=(224, 224, 3))



x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(2, activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False


In [7]:
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_val, y_val),
    epochs=55,
    verbose=1
)


Epoch 1/55


  self._warn_if_super_not_called()
I0000 00:00:1736493206.180221      68 service.cc:145] XLA service 0x78f3280032b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1736493206.180269      68 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1736493206.180275      68 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m 1/52[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14:34[0m 17s/step - accuracy: 0.6562 - loss: 0.6956

I0000 00:00:1736493214.679330      68 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 485ms/step - accuracy: 0.5834 - loss: 0.7821 - val_accuracy: 0.6021 - val_loss: 0.6480
Epoch 2/55
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 285ms/step - accuracy: 0.5953 - loss: 0.6977 - val_accuracy: 0.6021 - val_loss: 0.6434
Epoch 3/55
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 271ms/step - accuracy: 0.5897 - loss: 0.6834 - val_accuracy: 0.6021 - val_loss: 0.6240
Epoch 4/55
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 278ms/step - accuracy: 0.6348 - loss: 0.6522 - val_accuracy: 0.6920 - val_loss: 0.5887
Epoch 5/55
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 273ms/step - accuracy: 0.6738 - loss: 0.6250 - val_accuracy: 0.8478 - val_loss: 0.5287
Epoch 6/55
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 276ms/step - accuracy: 0.7283 - loss: 0.5785 - val_accuracy: 0.9377 - val_loss: 0.4579
Epoch 7/55
[1m52/52[0m [32m━━━

In [8]:
# Evaluate on the test set
test_loss, test_acc = model.evaluate(test_images, y_test)
print(f"Test Accuracy: {test_acc}")

[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 129ms/step - accuracy: 0.9869 - loss: 0.0252
Test Accuracy: 0.9916753172874451


In [9]:
# # model.save('full_model_1.h5')
# from tensorflow.keras.models import load_model

# # Load the saved model
# model = load_model('/kaggle/working/full_model_1.h5')

# # Example testing code
# # results = model.evaluate(X_test, y_test, verbose=1)
# test_loss, test_acc = model.evaluate(test_images, y_test)
# print(f"Test Loss: {test_loss}")
# print(f"Test Accuracy: {test_acc}")

# # Making predictions
# predictions = model.predict(test_images)

# CNN with pre-trained VGGNet16 

In [10]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score
X_test=test_images

vgg_model = VGG16(weights='/kaggle/input/vggnet16/keras/default/1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                  include_top=False, input_shape=(224, 224, 3))

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix, classification_report


# Add a global spatial average pooling layer
x = vgg_model.output
x = GlobalAveragePooling2D()(x)


feature_extractor = Model(inputs=vgg_model.input, outputs=x)

for layer in vgg_model.layers:
    layer.trainable = False


X_train_features = feature_extractor.predict(X_train)
X_val_features = feature_extractor.predict(X_val)
X_test_features = feature_extractor.predict(X_test)


X_train_features = X_train_features.reshape((X_train_features.shape[0], -1))
X_val_features = X_val_features.reshape((X_val_features.shape[0], -1))
X_test_features = X_test_features.reshape((X_test_features.shape[0], -1))

xgb_model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.01, max_depth=4)


xgb_model.fit(X_train_features, y_train)

val_predictions = xgb_model.predict(X_val_features)
val_proba = xgb_model.predict_proba(X_val_features)
val_roc_auc = roc_auc_score(y_val, val_proba, multi_class='ovr', average='macro')

# val_proba = model.predict_proba(X_val)


test_predictions = xgb_model.predict(X_test_features)

test_proba = xgb_model.predict_proba(X_test_features)
test_roc_auc = roc_auc_score(y_test, test_proba, multi_class='ovr', average='macro')



val_accuracy = accuracy_score(y_val, val_predictions)
val_f1 = f1_score(y_val, val_predictions, average='macro')  
val_precision = precision_score(y_val, val_predictions, average='macro')
val_recall = recall_score(y_val, val_predictions, average='macro')
# val_roc_auc = roc_auc_score(y_val, val_proba)

test_accuracy = accuracy_score(y_test, test_predictions)
test_f1 = f1_score(y_test, test_predictions, average='macro')
test_precision = precision_score(y_test, test_predictions, average='macro')
test_recall = recall_score(y_test, test_predictions, average='macro')

print(f'Validation Accuracy: {val_accuracy * 100:.5f}%')
print(f'Validation F1 Score: {val_f1:.5f}')
print(f'Validation Precision: {val_precision:.5f}')
print(f'Validation Recall: {val_recall:.5f}')
print(f'Validation ROC AUC: {val_roc_auc:.5f}')
# print('Validation Confusion Matrix:')
# print(val_confusion_matrix)
# print('Validation Classification Report:')
# print(val_classification_report)


print(f'Test Accuracy: {test_accuracy * 100:.5f}%')
print(f'Test F1 Score: {test_f1:.5f}')
print(f'Test Precision: {test_precision:.5f}')
print(f'Test Recall: {test_recall:.5f}')
print(f'Test ROC AUC: {test_roc_auc:.5f}')
y_val_class = np.argmax(y_val, axis=1)
val_predictions_class = np.argmax(val_predictions, axis=1)

y_test_class = np.argmax(y_test, axis=1)
test_predictions_class = np.argmax(test_predictions, axis=1)

# Now use these in confusion_matrix and classification_report
val_confusion_matrix = confusion_matrix(y_val_class, val_predictions_class)
val_classification_report = classification_report(y_val_class, val_predictions_class)

test_confusion_matrix = confusion_matrix(y_test_class, test_predictions_class)
test_classification_report = classification_report(y_test_class, test_predictions_class)
print('Test Confusion Matrix:')
print(test_confusion_matrix)
print('Test Classification Report:')
print(test_classification_report)



[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 222ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 334ms/step
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 196ms/step
Validation Accuracy: 99.65398%
Validation F1 Score: 0.99639
Validation Precision: 0.99569
Validation Recall: 0.99713
Validation ROC AUC: 1.00000
Test Accuracy: 99.94797%
Test F1 Score: 0.99946
Test Precision: 0.99936
Test Recall: 0.99956
Test ROC AUC: 1.00000
Test Confusion Matrix:
[[1140    1]
 [   0  781]]
Test Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1141
           1       1.00      1.00      1.00       781

    accuracy                           1.00      1922
   macro avg       1.00      1.00      1.00      1922
weighted avg       1.00      1.00      1.00      1922



In [11]:
# import pickle
# import numpy as np
# from sklearn.metrics import accuracy_score
# from tensorflow.keras.models import load_model

# filename = 'full-model_2.h5'
# # model.save(filename)


# # Load the saved model
# loaded_model = load_model(filename)

# print("Model loaded successfully")

# # Predict using the loaded model
# loaded_model_predictions = loaded_model.predict(X_test)

# # Convert probabilities to class labels if needed (assuming multi-class classification)
# loaded_model_predictions = np.argmax(loaded_model_predictions, axis=1)

# # If y_test is one-hot encoded, convert it to class labels
# y_test_labels = np.argmax(y_test, axis=1)

# # Calculate accuracy
# test_accuracy = accuracy_score(y_test_labels, loaded_model_predictions)

# print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


# Stacking Ensable with XGBooster



In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
import numpy as np

# Assuming y_train_single, y_val_single, and y_test_single are already 1D arrays of class labels
y_train_single = np.argmax(y_train, axis=1)
y_val_single = np.argmax(y_val, axis=1)
y_test_single= np.argmax(y_test, axis=1)
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_val_features = scaler.transform(X_val_features)
X_test_features = scaler.transform(X_test_features)

# Initialize the base models
lr = LogisticRegression(max_iter=500)
rf = RandomForestClassifier(n_estimators=100)
svm = SVC(probability=True)

# Fit the base models
lr.fit(X_train_features, y_train_single)
rf.fit(X_train_features, y_train_single)
svm.fit(X_train_features, y_train_single)

# Get the predicted probabilities for stacking
lr_train_pred = lr.predict_proba(X_train_features)
rf_train_pred = rf.predict_proba(X_train_features)
svm_train_pred = svm.predict_proba(X_train_features)

# Concatenate the predictions to form the meta input
meta_X_train = np.hstack((lr_train_pred, rf_train_pred, svm_train_pred))

# Get validation predictions
lr_val_pred = lr.predict_proba(X_val_features)
rf_val_pred = rf.predict_proba(X_val_features)
svm_val_pred = svm.predict_proba(X_val_features)
meta_X_val = np.hstack((lr_val_pred, rf_val_pred, svm_val_pred))

# Initialize and train the meta-learner (XGBoost)
meta_learner = xgb.XGBClassifier(n_estimators=400, learning_rate=0.001, max_depth=4, eval_metric=['logloss', 'error'], use_label_encoder=False)

eval_set = [(meta_X_train, y_train_single), (meta_X_val, y_val_single)]
meta_learner.fit(meta_X_train, y_train_single, eval_set=eval_set, verbose=True)

# Get test predictions from the base models
lr_test_pred = lr.predict_proba(X_test_features)
rf_test_pred = rf.predict_proba(X_test_features)
svm_test_pred = svm.predict_proba(X_test_features)

# Concatenate the test set predictions for the meta learner
meta_X_test = np.hstack((lr_test_pred, rf_test_pred, svm_test_pred))


 # Convert one-hot encoded y_test_single to class labels if it's one-hot encoded
if len(y_test_single.shape) > 1 and y_test_single.shape[1] > 1:
    y_test_single_labels = np.argmax(y_test_single, axis=1)
else:
    y_test_single_labels = y_test_single  # Already in label format

# Predict the final class labels with the meta learner
y_pred = meta_learner.predict(meta_X_test)

# Now compute the metrics using the true labels and predictions
accuracy = accuracy_score(y_test_single_labels, y_pred)
f1 = f1_score(y_test_single_labels, y_pred, average='macro')
precision = precision_score(y_test_single_labels, y_pred, average='macro')
recall = recall_score(y_test_single_labels, y_pred, average='macro')
roc_auc = roc_auc_score(y_test_single_labels, y_pred, multi_class='ovr')

print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'ROC AUC: {roc_auc}')



[0]	validation_0-logloss:0.67488	validation_0-error:0.40734	validation_1-logloss:0.67144	validation_1-error:0.39792
[1]	validation_0-logloss:0.67389	validation_0-error:0.40734	validation_1-logloss:0.67049	validation_1-error:0.39792
[2]	validation_0-logloss:0.67290	validation_0-error:0.40734	validation_1-logloss:0.66954	validation_1-error:0.39792
[3]	validation_0-logloss:0.67192	validation_0-error:0.40734	validation_1-logloss:0.66859	validation_1-error:0.39792
[4]	validation_0-logloss:0.67093	validation_0-error:0.40734	validation_1-logloss:0.66765	validation_1-error:0.39792
[5]	validation_0-logloss:0.66995	validation_0-error:0.40734	validation_1-logloss:0.66671	validation_1-error:0.39792
[6]	validation_0-logloss:0.66897	validation_0-error:0.40734	validation_1-logloss:0.66577	validation_1-error:0.39792
[7]	validation_0-logloss:0.66799	validation_0-error:0.40734	validation_1-logloss:0.66483	validation_1-error:0.39792
[8]	validation_0-logloss:0.66701	validation_0-error:0.40734	validation_1

#  Multi-Model Ensemble (VGG16 + ResNet50 + RF + SVM + Logistic Regression + XGBoost)

In [13]:
import os
import numpy as np
import cv2
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import xgboost as xgb
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator


train_dir = '/kaggle/input/pcos-detection-using-ultrasound-images/data/train'
test_dir = '/kaggle/input/pcos-detection-using-ultrasound-images/data/test'
image_size = (224, 224)

def load_images_from_folder(base_dir):
    images, labels = [], []
    classes = {'infected': 1, 'notinfected': 0}
    for label in classes:
        folder_path = os.path.join(base_dir, label)
        for image_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, image_file)
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, image_size)
                images.append(img)
                labels.append(classes[label])
    return np.array(images), np.array(labels)


train_images, train_labels = load_images_from_folder(train_dir)
test_images, test_labels = load_images_from_folder(test_dir)
train_images = train_images / 255.0
test_images = test_images / 255.0

X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.15, random_state=42)


datagen = ImageDataGenerator(rotation_range=20, zoom_range=0.15, width_shift_range=0.2,
                             height_shift_range=0.2, shear_range=0.15, horizontal_flip=True, fill_mode="nearest")
datagen.fit(X_train)

# Feature extraction with VGG16
vgg_model = VGG16(weights='/kaggle/input/vggnet16/keras/default/1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
                  include_top=False, input_shape=(224, 224, 3))
vgg_output = GlobalAveragePooling2D()(vgg_model.output)
vgg_feature_extractor = Model(inputs=vgg_model.input, outputs=vgg_output)
for layer in vgg_model.layers:
    layer.trainable = False

# Feature extraction with ResNet50
resnet_model =  ResNet50(weights='/kaggle/input/resnet50/keras/default/1/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                      include_top=False, 
                      input_shape=(224, 224, 3))

resnet_output = GlobalAveragePooling2D()(resnet_model.output)
resnet_feature_extractor = Model(inputs=resnet_model.input, outputs=resnet_output)
for layer in resnet_model.layers:
    layer.trainable = False

# Extract features from the training, validation, and test sets
X_train_vgg = vgg_feature_extractor.predict(X_train)
X_val_vgg = vgg_feature_extractor.predict(X_val)
X_test_vgg = vgg_feature_extractor.predict(test_images)

X_train_resnet = resnet_feature_extractor.predict(X_train)
X_val_resnet = resnet_feature_extractor.predict(X_val)
X_test_resnet = resnet_feature_extractor.predict(test_images)

# Flatten the features for concatenation
X_train_vgg = X_train_vgg.reshape((X_train_vgg.shape[0], -1))
X_val_vgg = X_val_vgg.reshape((X_val_vgg.shape[0], -1))
X_test_vgg = X_test_vgg.reshape((X_test_vgg.shape[0], -1))

X_train_resnet = X_train_resnet.reshape((X_train_resnet.shape[0], -1))
X_val_resnet = X_val_resnet.reshape((X_val_resnet.shape[0], -1))
X_test_resnet = X_test_resnet.reshape((X_test_resnet.shape[0], -1))

# Concatenate VGG16 and ResNet50 features
X_train_features = np.hstack((X_train_vgg, X_train_resnet))
X_val_features = np.hstack((X_val_vgg, X_val_resnet))
X_test_features = np.hstack((X_test_vgg, X_test_resnet))

# Standardize the features
scaler = StandardScaler()
X_train_features = scaler.fit_transform(X_train_features)
X_val_features = scaler.transform(X_val_features)
X_test_features = scaler.transform(X_test_features)

# Train base classifiers
lr = LogisticRegression(max_iter=500)
rf = RandomForestClassifier(n_estimators=100)
svm = SVC(probability=True)

lr.fit(X_train_features, y_train)
rf.fit(X_train_features, y_train)
svm.fit(X_train_features, y_train)

# Get base model predictions
lr_train_pred = lr.predict_proba(X_train_features)
rf_train_pred = rf.predict_proba(X_train_features)
svm_train_pred = svm.predict_proba(X_train_features)

# Stack base model predictions
meta_X_train = np.hstack((lr_train_pred, rf_train_pred, svm_train_pred))

# Validation set stacking
lr_val_pred = lr.predict_proba(X_val_features)
rf_val_pred = rf.predict_proba(X_val_features)
svm_val_pred = svm.predict_proba(X_val_features)
meta_X_val = np.hstack((lr_val_pred, rf_val_pred, svm_val_pred))

# Convert one-hot encoded labels back to single-column format for XGBoost
y_train_single = y_train  # No need to change, already single-class
y_val_single = y_val  # Already single-class
y_test_single = test_labels  # Use this for evaluation later

# XGBoost as meta-learner
meta_learner = xgb.XGBClassifier(n_estimators=500, learning_rate=0.001, max_depth=4, eval_metric=['logloss', 'error'])
eval_set = [(meta_X_train, y_train_single), (meta_X_val, y_val_single)]
meta_learner.fit(meta_X_train, y_train_single, eval_set=eval_set, verbose=True)

# Test set stacking
lr_test_pred = lr.predict_proba(X_test_features)
rf_test_pred = rf.predict_proba(X_test_features)
svm_test_pred = svm.predict_proba(X_test_features)
meta_X_test = np.hstack((lr_test_pred, rf_test_pred, svm_test_pred))

# Evaluate meta-learner
y_pred = meta_learner.predict(meta_X_test)

# Metrics
test_accuracy = accuracy_score(y_test_single, y_pred)
test_f1 = f1_score(y_test_single, y_pred, average='macro')
test_precision = precision_score(y_test_single, y_pred, average='macro')
test_recall = recall_score(y_test_single, y_pred, average='macro')
test_roc_auc = roc_auc_score(y_test_single, y_pred)

print(f'Test Accuracy: {test_accuracy * 100:.5f}%')
print(f'Test F1 Score: {test_f1:.5f}')
print(f'Test Precision: {test_precision:.5f}')
print(f'Test Recall: {test_recall:.5f}')
print(f'Test ROC AUC: {test_roc_auc:.5f}')


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 146ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 163ms/step
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 155ms/step
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 146ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 219ms/step
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 110ms/step
[0]	validation_0-logloss:0.67488	validation_0-error:0.40734	validation_1-logloss:0.67141	validation_1-error:0.39792
[1]	validation_0-logloss:0.67389	validation_0-error:0.40734	validation_1-logloss:0.67043	validation_1-error:0.39792
[2]	validation_0-logloss:0.67290	validation_0-error:0.40734	validation_1-logloss:0.66945	validation_1-error:0.39792
[3]	validation_0-logloss:0.67192	validation_0-error:0.40734	validation_1-logloss:0.66848	validation_1-error:0.39792
[4]	validation_0-logloss:0.67093	validation_0-error:0.40734	validation_1-logloss:0.66

In [14]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.linear_model import LogisticRegression
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
# import xgboost as xgb
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler


# scaler = StandardScaler()
# X_train_features = scaler.fit_transform(X_train_features)
# X_val_features = scaler.transform(X_val_features)
# X_test_features = scaler.transform(X_test_features)


# lr = LogisticRegression(max_iter=500)
# rf = RandomForestClassifier(n_estimators=100)
# svm = SVC(probability=True)


# lr.fit(X_train_features, y_train_single)
# rf.fit(X_train_features, y_train_single)
# svm.fit(X_train_features, y_train_single)


# lr_train_pred = lr.predict_proba(X_train_features)
# rf_train_pred = rf.predict_proba(X_train_features)
# svm_train_pred = svm.predict_proba(X_train_features)


# meta_X_train = np.hstack((lr_train_pred, rf_train_pred, svm_train_pred))


# lr_val_pred = lr.predict_proba(X_val_features)
# rf_val_pred = rf.predict_proba(X_val_features)
# svm_val_pred = svm.predict_proba(X_val_features)
# meta_X_val = np.hstack((lr_val_pred, rf_val_pred, svm_val_pred))


# meta_learner = xgb.XGBClassifier(n_estimators=200, learning_rate=0.01, max_depth=4, eval_metric=['logloss', 'error'], use_label_encoder=False)


# eval_set = [(meta_X_train, y_train_single), (meta_X_val, y_val_single)]


# meta_learner.fit(meta_X_train, y_train_single, eval_set=eval_set, verbose=True)


# lr_test_pred = lr.predict_proba(X_test_features)
# rf_test_pred = rf.predict_proba(X_test_features)
# svm_test_pred = svm.predict_proba(X_test_features)
# meta_X_test = np.hstack((lr_test_pred, rf_test_pred, svm_test_pred))

# y_pred = meta_learner.predict(meta_X_test)


# accuracy = accuracy_score(y_test_single, y_pred)
# f1 = f1_score(y_test_single, y_pred, average='macro')
# precision = precision_score(y_test_single, y_pred, average='macro')
# recall = recall_score(y_test_single, y_pred, average='macro')
# roc_auc = roc_auc_score(y_test_single, y_pred)

# print(f'Accuracy: {accuracy}')
# print(f'F1 Score: {f1}')
# print(f'Precision: {precision}')
# print(f'Recall: {recall}')
# print(f'ROC AUC: {roc_auc}')
