In [10]:
import re

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from nltk import accuracy
from sklearn import datasets
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from torch import dtype
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from BayesianLLNN import BayesianLastLayer,PredLastLayer,TrainLastLayer,BaseNetwork,TrainLastLayerCL, PredLastLayerCl
from baseNN import GetNeuralNetwork, PredNormal, TrainNN, GetNeuralNetworkClassification, TrainNNClass, PredictClass

Loading Dataset

In [11]:
np.random.seed(123)
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"

In [12]:
# load dataset, medical, diabetes, relevant. Diagnosis
df = pd.read_csv("/Users/kingmopser/PycharmProjects/ProbabilisticML/Dataset of Diabetes .csv")
df.drop(columns=["ID","No_Pation"], inplace=True)
len(df.columns) # 13

12

Pre-Processing for Input Data

In [13]:
scaler = StandardScaler()
encoder =LabelEncoder()
df["Gender"] = df["Gender"].apply(lambda x: 1 if x=="M" else 0)
c = df.columns[1:len(df.columns)-1]
df[c] =scaler.fit_transform(df[c])
df["CLASS"] = df["CLASS"].apply(lambda x: "N" if re.match(r"N",x) else "P" if x=="P" else "Y" if re.match(r"Y",x) else 0)
df["CLASS"]= pd.Categorical(df["CLASS"], categories=["N","P","Y"], ordered=True)
df["CLASS"] = encoder.fit_transform(df["CLASS"])

In [14]:
X = df.iloc[:,:-1].to_numpy(dtype=np.float32)
y= df['CLASS'].to_numpy()
X_train,X_test,y_train,y_test = train_test_split(X,y, train_size=0.8)
tenX = torch.tensor(X_train).to(device)
tenY = torch.tensor(y_train).to(device)
tenXTest = torch.tensor(X_test).to(device)
tenYTest = torch.tensor(y_test).to(device)
tenDF = TensorDataset(tenX,tenY)
trainLoader = DataLoader(tenDF,batch_size=32,shuffle=True)

Fitting Models

In [29]:
# Feature-Namen ohne Gender
feature_names = df.columns[1:len(df.columns)-1]
# Wähle beliebige Werte außerhalb des Trainingsbereichs, z. B. +5 und -5 Standardabweichungen
n_ood = 50  # wie viele OOD-Beispiele pro Feature

ood_samples = []
for i, feature in enumerate(feature_names):
    # Template: Mittelwert aller Features (z. B. aus X_train)
    base = np.mean(X_train, axis=0)
    # Für dieses Feature Wert weit außerhalb setzen (+5)
    ood_high = base.copy()
    ood_high[i] = 5  # oder beliebig >3

    ood_low = base.copy()
    ood_low[i] = -5  # oder beliebig <-3

    ood_samples.append(ood_high)
    ood_samples.append(ood_low)

# Zu Array zusammenfügen
X_ood = np.stack(ood_samples)
X_ood = np.column_stack((np.zeros(len(X_ood)),X_ood[:,:-1]))
y_ood = np.zeros(len(X_ood))
# Dummy, wenn Label nötig, ansonsten ignorieren
X_test_ODD= np.vstack([X_test,X_ood])
y_test_OOD=np.concatenate([y_test, y_ood])
# Zu Tensor (optional)
tenX_OOD = torch.tensor(X_test_ODD, dtype=torch.float32).to(device)
tenY_OOD = torch.tensor(y_test_OOD, dtype=torch.float32).to(device)
is_ood = np.array([False]*len(X_test) + [True]*len(X_ood))

In [37]:
# BASE NN CLASSIFIER
nn = GetNeuralNetworkClassification(len(df.columns[:-1]),len(np.unique(y)))
TrainNNClass(nn,trainLoader,epochs=30)
cl = PredictClass(nn,tenX_OOD)

Epoch: 0 | Loss: 0.84389
Epoch: 10 | Loss: 0.20232
Epoch: 20 | Loss: 0.26734


In [38]:
base = BaseNetwork(len(df.columns[:-1])).to(device)
head = torch.nn.Linear(in_features=8,out_features=len(df["CLASS"].unique())).to(device)
baseMod = torch.nn.Sequential(base,head)
TrainNNClass(baseMod,trainLoader,30)
# freezing base weights
for p in baseMod.parameters():
    p.requires_grad = False
lastLayer = BayesianLastLayer(in_features=8,out_features=len(df["CLASS"].unique())).to(device)
TrainLastLayerCL(base,lastLayer,trainLoader,epochs=30)
mean_prob, entropy = PredLastLayerCl(base,lastLayer,tenX_OOD)

Epoch: 0 | Loss: 0.92369
Epoch: 10 | Loss: 0.30172
Epoch: 20 | Loss: 0.24839


In [39]:
mean_prob_OOD= mean_prob[is_ood]
mean_prob_ID= mean_prob[~is_ood]
entropy_OOD = entropy[is_ood]
entropy_ID = entropy[~is_ood]