In [3]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import torch
import subprocess
import numpy as np
from torchmetrics import PrecisionRecallCurve, F1Score, ConfusionMatrix, Precision, Recall
from typing import List, Any
from torcheval.metrics.functional import binary_auprc, binary_auroc
from collections import defaultdict
import torch.nn as nn

In [4]:
class MLP(nn.Module):
    def __init__(
            self,
            input_size=512,
            num_classes=18,
            activation='relu',
            hidden_sizes=[1024, 2048, 1024, 256, 128],
            dropout=0.1
        ):
        super().__init__()
        
        # Pick activation
        if activation == "relu":
            activation_cls = nn.ReLU
        elif activation == "leaky_relu":
            activation_cls = nn.LeakyReLU
        elif activation == "gelu":
            activation_cls = nn.GELU
        else:
            raise ValueError(f"Unsupported activation: {activation}")

        layers = []
        in_dim = input_size
        for h in hidden_sizes:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.BatchNorm1d(h))  # helps stabilize
            layers.append(activation_cls())
            layers.append(nn.Dropout(dropout))
            in_dim = h

        # Final classification layer
        layers.append(nn.Linear(in_dim, num_classes))

        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)
    
model = MLP(
    input_size=512,
    activation='leaky_relu',
    dropout=0.1,
    num_classes=2,
)

In [5]:
state_dict = torch.load('/home/free4ky/projects/chest-diseases/model_binary_ctrate_mosmed.pth')
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
model.eval()
model.to('cuda')

MLP(
  (layers): Sequential(
    (0): Linear(in_features=512, out_features=1024, bias=True)
    (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=1024, out_features=2048, bias=True)
    (5): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): LeakyReLU(negative_slope=0.01)
    (7): Dropout(p=0.1, inplace=False)
    (8): Linear(in_features=2048, out_features=1024, bias=True)
    (9): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): LeakyReLU(negative_slope=0.01)
    (11): Dropout(p=0.1, inplace=False)
    (12): Linear(in_features=1024, out_features=256, bias=True)
    (13): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): LeakyReLU(negative_slope=0.01)
    (15): Dropout(p=0.1, inplace=False)
    (16): Linear(in_features=256

: 

In [None]:
from  torch.utils.data import TensorDataset, DataLoader
X_train = torch.load('/home/free4ky/projects/chest-diseases/data/preprocessed_mosmed/train_data.pt')
X_val = torch.load('/home/free4ky/projects/chest-diseases/data/preprocessed_mosmed/val_data.pt')
mosmed_ds = TensorDataset(
    torch.concat([X_train, X_val], dim=0)       
)
mosmed_dl = DataLoader(
    mosmed_ds,
    batch_size=1,
    shuffle=False
)


In [18]:
patalogy_probs = []
with torch.no_grad():
    for emb in mosmed_dl:
        # label = labels.any(dim=-1).long()
        emb[0] = emb[0].to('cuda')
        logits = model(emb[0])
        probs = nn.functional.softmax(logits)
        patalogy_probs.append(probs[0][-1].cpu().item())
patalogy_probs

  probs = nn.functional.softmax(logits)


[0.9998041987419128,
 0.9659420847892761,
 0.9999483823776245,
 0.9992824196815491,
 0.9991768002510071,
 0.9936116337776184,
 0.9983441829681396,
 0.7884594798088074,
 0.962137758731842,
 0.9918943047523499,
 0.9999845027923584,
 0.9988588094711304,
 0.9996053576469421,
 0.9990381002426147,
 0.9353761076927185,
 0.999943733215332,
 0.9998557567596436,
 0.9723762273788452,
 0.9947814345359802,
 0.11062029004096985,
 0.9964410662651062,
 0.9988920092582703,
 0.9982719421386719,
 0.9899062514305115,
 0.7362305521965027,
 0.9991602897644043,
 0.04577156528830528,
 0.5852149724960327,
 0.9990823268890381,
 0.9994527697563171,
 0.33186665177345276,
 0.9998931884765625,
 0.9999337196350098,
 0.8732342720031738,
 0.9999840259552002,
 0.9944267272949219,
 0.9997313618659973,
 0.06904087215662003,
 0.9685210585594177,
 0.7281748652458191,
 0.9983475208282471,
 0.999627947807312,
 0.9458487629890442,
 0.5248429179191589,
 0.9999268054962158,
 0.9848723411560059,
 0.49036750197410583,
 0.99985969

In [22]:
s = pd.Series(patalogy_probs)
s.quantile([0.1,.25,.75,.9])

0.10    0.731108
0.25    0.967450
0.75    0.999410
0.90    0.999857
dtype: float64

In [23]:
s[s<0.5]

19      0.110620
26      0.045772
30      0.331867
37      0.069041
46      0.490368
          ...   
1848    0.439804
1871    0.448998
1873    0.307171
1878    0.457991
1903    0.267192
Length: 119, dtype: float64