In [None]:
!pip install deepod

In [None]:
# Clone the DeepOD repository
# !git clone https://github.com/xuhongzuo/DeepOD.git

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
import sys
from deepod.models.devnet import DevNet
import seaborn as sns

merged_df = pd.read_csv('2_annthyroid.csv')

# Separate features (X) and labels (y) from the DataFrame
train_features = merged_df.iloc[:, :-1].values
train_labels = merged_df.iloc[:, -1].values

# Identify normal and anomaly subsets from the training data
normal_data = train_features[train_labels == 0]
anomaly_data = train_features[train_labels == 1]

# Calculate the number of samples for training
total_samples = len(train_features)
num_samples_train = int(0.7 * total_samples)
num_samples_anomaly = int(0.02 * num_samples_train)

# Randomly select the samples for the training set
np.random.shuffle(normal_data)  # Shuffle the normal data randomly

# Select the desired portions of normal and anomaly data for training
X_train = np.vstack((normal_data[:num_samples_train - num_samples_anomaly], anomaly_data[:num_samples_anomaly]))
y_train = np.hstack((np.zeros(len(normal_data[:num_samples_train - num_samples_anomaly])), np.ones(len(anomaly_data[:num_samples_anomaly]))))

# Separate features and labels for the test set (assuming you've prepared it)
test_features = merged_df.iloc[:, :-1].values
test_labels = merged_df.iloc[:, -1].values

# Initialize and train the DevNet model with 10 epochs
devnet_model = DevNet(epochs=10)
devnet_model.fit(X_train, y_train)
# train_labels use 1 for known anomalies and 0 for unlabeled data

# Make predictions on the test set
predictions = devnet_model.predict(test_features)
print(predictions)

# Obtain anomaly scores for the test set
scores = devnet_model.decision_function(test_features)
print(scores)

# Calculate evaluation metrics
accuracy = accuracy_score(test_labels, predictions)
precision_devnet = precision_score(test_labels, predictions)
recall_devnet = recall_score(test_labels, predictions)
f1_score_devnet = f1_score(test_labels, predictions)
confusion = confusion_matrix(test_labels, predictions)

# Compute the False Positive Rate (FPR), True Positive Rate (TPR), and threshold values for the ROC curve
fpr7, tpr7, thresholds = roc_curve(test_labels, predictions)

# Calculate the Area Under the ROC Curve (AUC)
auc_roc_devnet = auc(fpr7, tpr7)
print("DevNet")
print(" Precision:{:.4f}".format(precision_devnet))
print(" Recall:{:.4f}".format(recall_devnet))
print(" F1-score:{:.4f}".format(f1_score_devnet))
print(" AUC-ROC:{:.4f}".format(auc_roc_devnet))
