In [None]:
from keras.layers import Dense, Activation, BatchNormalization, Add, Input
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import numpy as np

# Load dataset
synthetic = np.genfromtxt("/kaggle/input/synthetic-dataset-with-pca/Synthetic_Dataset_with_PCA.csv", delimiter=',', dtype=float)
outputs = np.genfromtxt("/kaggle/input/combined-dataset-before-smote/output-2.csv", delimiter = ',', dtype = float)

# Separate features and target
X = synthetic[1:]
y = outputs[:, -4:]

print(X.shape)
print(y.shape)

# Split dataset into training and validation sets
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42)

print(x_test.shape)
print(y_test.shape)

# Define input shape
input_shape = x_train.shape[1:]

# Define ResNet-like architecture for tabular data
def residual_block(x, num_neurons):
    y = Dense(num_neurons)(x)
    y = BatchNormalization()(y)
    y = Activation('relu')(y)
    y = Dense(num_neurons)(y)
    y = BatchNormalization()(y)
    y = Activation('relu')(y)
    y = Add()([x, y])
    return y

inputs = Input(shape=input_shape)
x = Dense(64)(inputs)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# Add residual blocks
for _ in range(3):  # Number of residual blocks
    x = residual_block(x, 64)

# Output layer
outputs = Dense(4, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=outputs)

# Compile model
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# Train model
history = model.fit(x_train, y_train, batch_size=16, epochs=30, validation_data=(x_test, y_test))


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn import metrics

# Predict on the test set
y_pred = model.predict(x_test)

# Flatten the predicted and actual values
y_pred_flat = y_pred.flatten()
y_test_flat = y_test.flatten()

# Convert to binary classification (example)
# Adjust this step according to your specific problem
threshold = 0.5
y_pred_binary = (y_pred_flat > threshold).astype(int)

# Reshape y_test_flat to match the shape of y_pred_flat
y_test_binary = (y_test_flat[:len(y_pred_binary)] > threshold).astype(int)

In [None]:
y_pred = model.predict(x_test)
#y_pred = to_categorical(y_pred)
f1 = f1_score(y_test_binary, y_pred_binary, average='weighted')
# Calculate accuracy and F1 score
accuracy = accuracy_score(y_test_binary, y_pred_binary)
precision = precision_score(y_test_binary, y_pred_binary)
recall = recall_score(y_test_binary, y_pred_binary)

cm = confusion_matrix(y_test_binary.ravel(), y_test_binary.ravel())
#np.savetxt('cm.txt', cm, delimiter=',', fmt='%f')
#f1 = f1_score(y_test.ravel(), y_pred.ravel())
tpr_value = cm[1, 1] / (cm[1, 1] + cm[1, 0])
fpr_value = cm[0, 1] / (cm[0, 1] + cm[0, 0])

print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision Score:", precision)
print("Recall Score:", recall)
print("TPR Score:", tpr_value)
print("FPR Score:", fpr_value)


In [None]:
import json
import numpy as np
from sklearn.metrics import roc_curve, precision_recall_curve, auc

# Assuming y_pred is the prediction from your model, you need to get it first
y_pred = model.predict(x_test)

# Initialize lists to store the AUC scores for each label
prc_aucs = []
roc_aucs = []

# Loop over each label
for i in range(y_test.shape[1]):
    precision_a, recall_a, _ = precision_recall_curve(y_test[:, i], y_pred[:, i])
    prc_auc = auc(recall_a, precision_a)
    prc_aucs.append(prc_auc)
    print(f"Area Under the PR Curve score for label {i}: {prc_auc}")

    fpr, tpr, _ = roc_curve(y_test[:, i], y_pred[:, i])
    roc_auc = auc(fpr, tpr)
    roc_aucs.append(roc_auc)
    print(f"Area Under the ROC Curve score for label {i}: {roc_auc}")

# Calculate the average AUC scores
average_prc_auc = np.mean(prc_aucs)
average_roc_auc = np.mean(roc_aucs)

print("Average Area Under the PR Curve score: ", average_prc_auc)
print("Average Area Under the ROC Curve score: ", average_roc_auc)




In [None]:
# Convert any non-serializable types to lists or other serializable types
precision_list = precision_a.tolist()
recall_list = recall_a.tolist()
tpr_list = tpr.tolist()
fpr_list = fpr.tolist()

precision = np.mean(precision_list)
recall = np.mean(recall_list)

# Construct the data dictionary
data = {
    "Average F1 Score": f1, 
    "Average Precision": precision, 
    "Average Recall": recall, 
    "Average True Positive Rate": tpr_value, 
    "Average False Positive Rate": fpr_value, 
    "Average ROC AUC": float(average_roc_auc), 
    "Average PRC AUC": float(average_prc_auc),
    "TPR Array": tpr_list,
    "FPR Array": fpr_list,
    "Precision Array": precision_list, 
    "Recall Array": recall_list
}

# Specify the file path
file_path = "/kaggle/working/ResNet - Synthetic Dataset with PCA.json"

# Write the data to a JSON file
with open(file_path, "w") as json_file:
    json.dump(data, json_file)

print(f"Metrics saved to {file_path}")