In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from sklearn.metrics import roc_curve, auc, f1_score, classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import layers, models, regularizers   # Import models here
from keras.metrics import MeanSquaredError


2025-01-11 20:55:47.329200: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
print(f"PyTorch version: {torch.__version__}")

PyTorch version: 2.3.0+cu121


In [3]:
background = np.load('Datasets/background.npz')['data']
bbh = np.load('Datasets/bbh_for_challenge.npy')
sglf = np.load('Datasets/sglf_for_challenge.npy')


In [4]:
background.shape, bbh.shape, sglf.shape

((100000, 2, 200), (100000, 2, 200), (100000, 2, 200))

# CNN 

In [5]:
# Transpose the datasets to match the required shape (N, 200, 2)
background = background.transpose(0, 2, 1)  # Shape: (100000, 200, 2)
bbh = bbh.transpose(0, 2, 1)                # Shape: (100000, 200, 2)
sglf = sglf.transpose(0, 2, 1)              # Shape: (100000, 200, 2)

# Combine datasets
X = np.concatenate([background, bbh, sglf], axis=0)  # Shape: (300000, 200, 2)

# Create labels: 0 for background, 1 for BBH, 2 for SGJF
y = np.concatenate([
    np.zeros(background.shape[0], dtype=int),  # Shape: (100000,)
    np.ones(bbh.shape[0], dtype=int),          # Shape: (100000,)
    np.full(sglf.shape[0], 2, dtype=int)       # Shape: (100000,)
])

# Split into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Further split the training set into training (90%) and validation (10%)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

y_train_one_hot = tf.keras.utils.to_categorical(y_train, num_classes=3)
y_val_one_hot = tf.keras.utils.to_categorical(y_val, num_classes=3)




In [37]:
from tensorflow.keras import layers, models, regularizers

input_shape = (200, 2)

model = models.Sequential([
    layers.Conv1D(filters=16, kernel_size=3, activation='relu', input_shape=input_shape, 
                  kernel_regularizer=regularizers.l2(0.2)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=32, kernel_size=3, activation='relu', 
                  kernel_regularizer=regularizers.l2(0.2)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(filters=64, kernel_size=3, activation='relu', 
                  kernel_regularizer=regularizers.l2(0.2)),
    layers.MaxPooling1D(pool_size=2),
    layers.Flatten(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    layers.Dropout(0.4),
    # Change the output layer to softmax for multi-class classification
    layers.Dense(3, activation='softmax')  # 3 classes: background, BBH, SGJF
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'mse'])

# Print the model summary
model.summary()


In [38]:
# # Train the model
# history = model.fit(
#     X_train, y_train, 
#     validation_data=(X_val, y_val), 
#     epochs=20, 
#     batch_size=64
# )

history = model.fit(
    X_train, y_train_one_hot,
    validation_data=(X_val, y_val_one_hot),
    epochs=20,
    batch_size=64
)


Epoch 1/20
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.7234 - loss: 1.9931 - mse: 0.1316 - val_accuracy: 0.8141 - val_loss: 0.7257 - val_mse: 0.0920
Epoch 2/20
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8372 - loss: 0.6508 - mse: 0.0802 - val_accuracy: 0.8271 - val_loss: 0.6510 - val_mse: 0.0858
Epoch 3/20
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.8575 - loss: 0.5862 - mse: 0.0706 - val_accuracy: 0.9001 - val_loss: 0.5274 - val_mse: 0.0558
Epoch 4/20
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.8773 - loss: 0.5406 - mse: 0.0620 - val_accuracy: 0.8890 - val_loss: 0.5163 - val_mse: 0.0553
Epoch 5/20
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.8818 - loss: 0.5238 - mse: 0.0592 - val_accuracy: 0.9014 - val_loss: 0.4839 - val_mse: 0.0504
Epoch 6/20
[1m3

In [39]:
y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes=3)# Evaluate the model with one-hot encoded labels
test_loss, test_accuracy, test_mse = model.evaluate(X_test, y_test_one_hot)

print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}, Test MSE: {test_mse}")


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9403 - loss: 0.3523 - mse: 0.0307
Test Loss: 0.35117462277412415, Test Accuracy: 0.941433310508728, Test MSE: 0.030274994671344757


In [1]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np

def plot_roc_multiclass(model, X_test, y_test, class_labels):
    """
    Plots the ROC curve for a multi-class classification model.
    
    Parameters:
    - model: Trained Keras model.
    - X_test: Test data (features).
    - y_test: Test data (labels).
    - class_labels: List of class labels.
    """
    # Binarize the labels for multi-class ROC curve computation
    y_test_bin = label_binarize(y_test, classes=range(len(class_labels)))
    
    # Get the predicted probabilities for each class
    y_score = model.predict(X_test)
    
    # Compute ROC curve and AUC for each class
    fpr = {}
    tpr = {}
    roc_auc = {}
    for i in range(len(class_labels)):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Plot the ROC curves
    plt.figure(figsize=(8, 6))
    for i, label in enumerate(class_labels):
        plt.plot(fpr[i], tpr[i], label=f"Class {label} (AUC = {roc_auc[i]:.2f})")
    
    plt.plot([0, 1], [0, 1], 'k--', lw=2)  # Diagonal line
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate", fontsize=14)
    plt.ylabel("True Positive Rate", fontsize=14)
    plt.title("ROC Curve for Multi-Class Classification", fontsize=16)
    plt.legend(loc="lower right", fontsize=12)
    plt.grid(alpha=0.3)
    plt.show()

# Example usage:
class_labels = ['Background', 'BBH', 'SGJF']
plot_roc_multiclass(model, X_test, y_test, class_labels)


NameError: name 'model' is not defined

In [25]:
y_pred_prob = model.predict(X_test)  # Get predicted probabilities
y_pred = np.argmax(y_pred_prob, axis=1)  # Convert probabilities to class labels

f1_macro = f1_score(y_test, y_pred, average='macro')  # Macro F1 score
f1_weighted = f1_score(y_test, y_pred, average='weighted')  # Weighted F1 score
f1_per_class = f1_score(y_test, y_pred, average=None)  # F1 score per class

print("Macro F1 Score:", f1_macro)
print("Weighted F1 Score:", f1_weighted)
print("F1 Score per Class:", f1_per_class)


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 880us/step
Macro F1 Score: 0.6895172165642999
Weighted F1 Score: 0.6890974915442465
F1 Score per Class: [0.92718225 0.55113075 0.59023865]
