In [124]:
#importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import tensorflow as tf


In [125]:
# Create imbalanced dataset
X, y = make_classification(n_samples=10000, n_classes=2, class_sep=2,
                            weights=[0.97, 0.03], n_informative=3, n_redundant=1, flip_y=0,
                            n_features=20, random_state=42)

# Printing the number of samples in each class
print("Number of samples in each class:")
print("Class 0: ", len(y[y==0]))
print("Class 1: ", len(y[y==1]))


Number of samples in each class:
Class 0:  9700
Class 1:  300


In [126]:
#splitting the dataset into training and test 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [127]:
# Define custom loss function
def weighted_binary_crossentropy(y_true, y_pred):
    # Convert y_true to float64
    y_true = tf.cast(y_true, tf.float64)
    y_pred = tf.cast(y_pred, tf.float64)
    
    # Calculate class weights
    n_samples = len(y_true)
    n_positives = tf.reduce_sum(y_true)


    n_negatives = tf.cast(n_samples, tf.int32) - tf.cast(n_positives, tf.int32)
    alpha = n_negatives / n_samples
    beta = 1 - alpha
    
    # Calculate cross-entropy loss
    loss = -(beta * y_true * tf.math.log(y_pred + 1e-7) + alpha * (1 - y_true) * tf.math.log(1 - y_pred + 1e-7))
    return tf.reduce_mean(loss)

In [128]:
# Define binary classification model
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(20,)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [129]:
# Compile the model with custom loss function
model.compile(optimizer='adam', loss=weighted_binary_crossentropy)

In [130]:
# Train the model
X_train=tf.convert_to_tensor(X_train, dtype=tf.float64)
X_test=tf.convert_to_tensor(X_test, dtype=tf.float64)
y_train=tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test=tf.convert_to_tensor(y_test, dtype=tf.float64)

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x19717513940>

In [131]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Make predictions on the testing set
y_pred = model.predict(X_test)
y_pred_binary = np.round(y_pred)

# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred_binary)
precision = precision_score(y_test, y_pred_binary)
recall = recall_score(y_test, y_pred_binary)
f1 = f1_score(y_test, y_pred_binary)
auc_roc = roc_auc_score(y_test, y_pred)

# Print evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 score:", f1)
print("AUC-ROC:", auc_roc)


Accuracy: 0.9935
Precision: 1.0
Recall: 0.7758620689655172
F1 score: 0.8737864077669902
AUC-ROC: 0.9964309812138216
