In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, precision_recall_curve
from google.colab import files

# Load the dataset
file_path = "/mnt/data/cisc_352_dataaset.csv"
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print("Dataset not found. Please upload your CSV file.")
    uploaded = files.upload()
    file_path = list(uploaded.keys())[0]
    df = pd.read_csv(file_path)

df.drop(columns=['id'], inplace=True, errors='ignore')  #  'id' column is removed to keep the data consistant
df['diagnosis'] = LabelEncoder().fit_transform(df['diagnosis'])  # Encode labels
X = df.drop(columns=['diagnosis']).values
y = df['diagnosis'].values

# Normalizeing features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# making sure correct number of features
X = X[:, :30]  # making sure only 30 features

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Bayesian Linear Layer
class BayesianLinear(nn.Module):
    def __init__(self, in_features, out_features, prior_std=1.0):
        super().__init__()
        self.prior_mean = torch.zeros(out_features, in_features)
        self.prior_std = torch.ones(out_features, in_features) * prior_std
        self.mu = nn.Parameter(torch.randn(out_features, in_features) * 0.1)
        self.rho = nn.Parameter(torch.randn(out_features, in_features) * -3.0)

    def forward(self, x):
        std = torch.log1p(torch.exp(self.rho))
        eps = torch.randn_like(std)
        weights = self.mu + std * eps
        return F.linear(x, weights)

    def kl_divergence(self):
        std = torch.log1p(torch.exp(self.rho))
        kl = (std**2 + (self.mu - self.prior_mean)**2) / (2 * self.prior_std**2) - 0.5 + torch.log(self.prior_std / std)
        return kl.sum()

# Bayesian Neural Network
class BayesianNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.bayesian_fc1 = BayesianLinear(input_dim, hidden_dim)
        self.bayesian_fc2 = BayesianLinear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.bayesian_fc1(x))
        x = torch.sigmoid(self.bayesian_fc2(x))
        return x

    def kl_divergence(self):
        return self.bayesian_fc1.kl_divergence() + self.bayesian_fc2.kl_divergence()

# Train BNN
def train_bnn(model, X_train, y_train, epochs=100, learning_rate=0.01, beta=0.01):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.BCELoss()
    loss_history = []

    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(X_train)
        likelihood_loss = criterion(output, y_train)
        kl_loss = model.kl_divergence()
        loss = likelihood_loss + beta * kl_loss
        loss.backward()
        optimizer.step()
        loss_history.append(loss.item())

        if epoch % 10 == 0:
            print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}, KL = {kl_loss.item():.4f}")
    
    return loss_history

# Initialize and train BNN
bnn = BayesianNN(input_dim=30, hidden_dim=20, output_dim=1)
loss_history = train_bnn(bnn, X_train_tensor, y_train_tensor)

# Evaluate BNN
bnn.eval()
y_pred_prob = bnn(X_test_tensor).detach().numpy()
y_pred = (y_pred_prob > 0.5).astype(int)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

# ROC Curve
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

# Precision-Recall Curve
precision, recall, _ = precision_recall_curve(y_test, y_pred_prob)

# Plot Loss Curve
plt.figure()
plt.plot(loss_history, label='Training Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("BNN Training Loss Curve")
plt.legend()
plt.show()

# Plot ROC Curve
plt.figure()
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f}')
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Receiver Operating Characteristic (ROC) Curve")
plt.legend()
plt.show()

# Plot Precision-Recall Curve
plt.figure()
plt.plot(recall, precision, label="Precision-Recall Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.show()