In [1]:
from transformers.modeling_outputs import SemanticSegmenterOutput  
from transformers import Dinov2Model, Dinov2PreTrainedModel  
from retouch_dataloader_utils import load_train_and_val  
from torch.utils.tensorboard import SummaryWriter
from aroi_dataloder_utils import load_val
from torch.utils.data import DataLoader  
from torch.utils.data import Dataset  
import torch.nn.functional as F  
from torch.optim import AdamW  
from tqdm.auto import tqdm  
import albumentations as A 
from PIL import Image  
import pandas as pd
import numpy as np  
import torch  
import cv2 
import csv 
import os 


torch.cuda.is_available() 

  check_for_updates()


True

In [2]:
#from datasets import load_dataset 
id2label = {
    0:"Background",
    1:"IRF",
    2:"SRF",
    3:"PED"
}
# map every class to a random color 
id2color = {k: list(np.random.choice(range(256), size=3)) for k,v in id2label.items()} 

In [18]:
class SegmentationDataset(Dataset): 
  def __init__(self, dataset, transform): 
    self.dataset = dataset 
    self.transform = transform 
 
  def __len__(self): 
    return len(self.dataset) 
 
  def __getitem__(self, idx): 
    item = self.dataset[idx] 

    original_image= np.load(item["image_path"])
    original_image = np.stack([original_image] * 3, axis=-1)

    original_segmentation_map = np.load(item["label_path"])
    #print("Az image értékei",np.min(original_image),np.max(original_image))
    #print("A label értékei,",np.min(original_segmentation_map),np.max(original_segmentation_map))
    transformed = self.transform(image=original_image, mask=original_segmentation_map)
    #kipróbálni hogy a maszkot is torzítja-e
    image, target = torch.tensor(transformed['image']), torch.LongTensor(transformed['mask']) 
 
    # convert to C, H, W 
    image = image.permute(2, 0, 1)
    image_path=item["image_path"] 
      
    return image, target, original_image, original_segmentation_map, image_path
 
# Let's create the training and validation datasets (note that we only randomly crop for training images). 
ADE_MEAN = (np.array([123.675, 116.280, 103.530])).tolist()
ADE_STD = (np.array([58.395, 57.120, 57.375])).tolist()
#Ha a kép 0-255 akkor nem kell a /255 osztás
#A transformokat leellenőrizni
 
train_transform = A.Compose([ 
    A.Resize(width=448, height=448),
    A.MedianBlur(blur_limit=5,p=0.5),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
    #A.HorizontalFlip(p=0.5), #Horizontális tükrzözés az esetek 50%-ban
    #A.GaussianBlur(p=0.5),
], is_check_shapes=False) 
 
val_transform = A.Compose([ 
    A.Resize(width=448, height=448), 
    A.Normalize(mean=ADE_MEAN, std=ADE_STD), 
], is_check_shapes=False) 

In [4]:
def collate_fn(inputs): 
    batch = dict() 
    batch["pixel_values"] = torch.stack([i[0] for i in inputs], dim=0) 
    batch["labels"] = torch.stack([i[1] for i in inputs], dim=0) 
    batch["original_images"] = [i[2] for i in inputs] 
    batch["original_segmentation_maps"] = [i[3] for i in inputs]
    batch["image_path"] = [i[4] for i in inputs]
 
    return batch 

In [5]:
class LinearClassifier(torch.nn.Module): 
    def __init__(self, in_channels, tokenW=32, tokenH=32, num_labels=1): 
        super(LinearClassifier, self).__init__() 
 
        self.in_channels = in_channels 
        self.width = tokenW 
        self.height = tokenH 

        #Két-réteg:
        
        #self.conv1 = torch.nn.Conv2d(in_channels, 64, (6,6), padding=1)
        #self.conv2 = torch.nn.Conv2d(64, 128, (6,6), padding=1)
        #self.classifier = torch.nn.Conv2d(128, num_labels, (1,1))

        #Egy réteg:
        self.conv = torch.nn.Conv2d(in_channels, 128, (6,6), padding=1)
        self.classifier = torch.nn.Conv2d(128, num_labels, (1,1))

    def forward(self, embeddings): 
        embeddings = embeddings.reshape(-1, self.height, self.width, self.in_channels) 
        embeddings = embeddings.permute(0,3,1,2) 

        #Két-réteg:
        #x = torch.relu(self.conv1(embeddings))
        #x = torch.relu(self.conv2(x))
        #return self.classifier(x)
        
        #Egy-réteg
        x = torch.relu(self.conv(embeddings))
        return self.classifier(x)
 
 
class Dinov2ForSemanticSegmentation(Dinov2PreTrainedModel): 
  def __init__(self, config): 
    super().__init__(config) 
 
    self.dinov2 = Dinov2Model(config) 
    self.classifier = LinearClassifier(config.hidden_size, 32, 32, config.num_labels) 
 
  def forward(self, pixel_values, output_hidden_states=False, output_attentions=False, labels=None): 
    # use frozen features 
    outputs = self.dinov2(pixel_values, 
                            output_hidden_states=output_hidden_states, 
                            output_attentions=output_attentions) 
    # get the patch embeddings - so we exclude the CLS token 
    patch_embeddings = outputs.last_hidden_state[:,1:,:] 
 
    # convert to logits and upsample to the size of the pixel values 
    logits = self.classifier(patch_embeddings) 
    #ezen később lehet változtatni
    logits = torch.nn.functional.interpolate(logits, size=pixel_values.shape[2:], mode="bilinear", align_corners=False) 
 
    loss = None 
    if labels is not None: 
      # important: we're going to use 0 here as ignore index instead of the default -100 
      # as we don't want the model to learn to predict background 
      loss_fct = torch.nn.CrossEntropyLoss(ignore_index=0) 
      loss = loss_fct(logits.squeeze(), labels.squeeze()) 
 
    return SemanticSegmenterOutput( 
        loss=loss, 
        logits=logits, 
        hidden_states=outputs.hidden_states, 
        attentions=outputs.attentions, 
    ) 


In [6]:
def dice_loss(preds, targets, num_classes, smooth=1.0):
    #print("preds dimenziója",preds.shape)
    #print("targets dimenziója",targets.shape)
    #preds dimenziója torch.Size([4, 4, 448, 448])
    #targets dimenziója torch.Size([4, 448, 448])
    
    # Softmax alkalmazása a predikciókra (csatorna dimenzió mentén)
    preds_softmax = F.softmax(preds, dim=1).float()

    # One-hot kódolás helyett közvetlenül használjuk a softmax kimenetet
    targets_one_hot = F.one_hot(targets, num_classes).permute(0, 3, 1, 2).float()

    # Dice loss számítás
    intersection = (preds_softmax * targets_one_hot).sum(dim=(2, 3))
    union = preds_softmax.sum(dim=(2, 3)) + targets_one_hot.sum(dim=(2, 3))

    #szélesség magasság felett aggregálni
    #batch - példák között ne aggregálni
    #csatornák felett nem muszáj

    dice = (2.0 * intersection + smooth) / (union + smooth)
    dice_loss = 1 - dice.mean()
    
    return dice_loss

In [7]:
def compute_dice_coefficient(preds, targets, num_classes, eps=1e-6):
    preds_one_hot = torch.nn.functional.one_hot(preds, num_classes).permute(0, 3, 1, 2).float()
    targets_one_hot = torch.nn.functional.one_hot(targets, num_classes).permute(0, 3, 1, 2).float()

    #itt is át kell írni mint a lossnál
    intersection = (preds_one_hot * targets_one_hot).sum(dim=(2, 3))
    union = preds_one_hot.sum(dim=(2, 3)) + targets_one_hot.sum(dim=(2, 3))

    dice = (2.0 * intersection + eps) / (union + eps)
    return dice.mean().item()

In [8]:
# def compute_dice_for_inference(3dpreds, 3dtargets, num_classes, eps=0):
#aggregálni a szélességet, magasságot, és 128 vagy amennyi a dimenzió, ezt a permuteban
# a függvény kimenete 4db dice érték
def compute_dice_for_inference(preds, targets, num_classes, eps=0):
    preds_one_hot = torch.nn.functional.one_hot(preds, num_classes).permute(0, 3, 1, 2).float()
    targets_one_hot = torch.nn.functional.one_hot(targets, num_classes).permute(0, 3, 1, 2).float()

    #itt is át kell írni mint a lossnál
    intersection = (preds_one_hot * targets_one_hot).sum(dim=(2, 3))
    union = preds_one_hot.sum(dim=(2, 3)) + targets_one_hot.sum(dim=(2, 3))

    dice = (2.0 * intersection + eps) / (union + eps)
    return dice

In [9]:
def create_tensorboard_writer(logging_dir_name):
    return SummaryWriter(log_dir="runs/"+logging_dir_name)  # A logokat ebbe a mappába menti

In [10]:
def training(epochs,learning_rate,cross_val,train_dataloader):
    
    logging_dir_name=str(epochs)+'_'+str(learning_rate)+'_'+str(cross_val)
    writer=create_tensorboard_writer(logging_dir_name)
    
    model = Dinov2ForSemanticSegmentation.from_pretrained("facebook/dinov2-base", id2label=id2label, num_labels=len(id2label)) 
     
    for name, param in model.named_parameters(): 
      if name.startswith("dinov2"): 
        param.requires_grad = False 
     
    #outputs = model(pixel_values=batch["pixel_values"], labels=batch["labels"]) 
    
    learning_rate=float(learning_rate)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    #optimizer = AdamW(model.parameters(), lr=0.01)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    
    # Set model to training mode
    model.train()
    
    # Define CrossEntropyLoss
    ce_loss_fn = torch.nn.CrossEntropyLoss(ignore_index=0)  # Ignore background


    #scheduler = torch.optim.lr_scheduler.PolynomialLR(optimizer,total_iters=10, power=3.0)
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
    
        dice_score_avg = []
        loss_avg = []
        print(f"Learning rate: {optimizer.param_groups[0]['lr']:.6f}")
        
        for idx, batch in enumerate(tqdm(train_dataloader)):
            pixel_values = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)
    
            # Forward pass
            outputs = model(pixel_values, labels=labels)
            logits = outputs.logits
            loss_ce = ce_loss_fn(logits.squeeze(), labels.squeeze())  # CE Loss
    
            # Get predicted class indices
            preds = logits
    
            # Compute Dice loss
            loss_dice = dice_loss(preds, labels, num_classes=len(id2label))
    
            # Combine losses (weighted sum)
            loss = loss_ce + loss_dice  # You can adjust weights if needed
            loss_avg.append(loss.item())
            
            # Backward pass
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
            # Compute Dice coefficient (evaluation metric)
            with torch.no_grad():
                preds=logits.argmax(dim=1)
                dice_score = compute_dice_coefficient(preds.detach().cpu(), labels.detach().cpu(), num_classes=len(id2label))
                dice_score_avg.append(dice_score)
                
            # Print loss and metric every 100 batches
            #if idx % 100 == 0:
            #   print(f"Step {idx}, Loss: {loss.item():.4f}, Dice Score: {dice_score:.4f}")
        print("Dice score average",np.nanmean(dice_score_avg))
        print("Loss average",np.nanmean(loss_avg))
        
        writer.add_scalar("Loss/train", np.nanmean(loss_avg), epoch)
        writer.add_scalar("Accuracy/train", np.nanmean(dice_score_avg), epoch)
        #scheduler.step()
        
    writer.close()
    return model

In [11]:
def run_inference(image_dir,model,val_dataloader,epoch,lr,cross_val):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    columns = ["scan_number", "scan_index", "Background", "IRF", "SRF", "PED"]
    df = pd.DataFrame(columns=columns)
    
    for idx, batch in enumerate(tqdm(val_dataloader)):
        scan_number = get_scan_number(image_dir,batch["image_path"][0])
        scan_index = get_scan_index(image_dir,batch["image_path"][0])
        test_image = batch["pixel_values"]
        labels = batch["labels"]
        with torch.no_grad():
            outputs = model(test_image.to(device))
            size=test_image.shape[:2]
            logits = outputs.logits
            preds = logits.argmax(dim=1)
            dice_score = compute_dice_for_inference(preds.detach().cpu(), labels.detach().cpu(), num_classes=len(id2label))
            dice_list = [round(val, 4) for val in dice_score.squeeze().tolist()]
            df.loc[idx] = [scan_number,scan_index,dice_list[0],dice_list[1],dice_list[2],dice_list[3]]
    csv_filename=str(epoch)+'_'+str(lr)+'_'+str(cross_val)+'.csv'
    df.to_csv(csv_filename, index=False)
    print(df.mean())

In [12]:
def log_run(epoch, lr, cross_val, inference_score):
    log_file = "run_logs.csv"
    file_exists = os.path.exists(log_file)
    
    # Megnyitjuk a fájlt hozzáfűzés (append) módban
    with open(log_file, mode='a', newline='') as file:
        writer = csv.writer(file)
        
        # Ha a fájl még nem létezik, írjuk be a fejlécet
        if not file_exists:
            writer.writerow(["epoch", "learning_rate", "cross_validation", "inference_score"])
        
        # Új sor hozzáfűzése a loghoz
        writer.writerow([epoch, lr, cross_val, inference_score])
    
    print(f"Log mentve: epoch={epoch}, lr={lr}, cross_val={cross_val}, inference_score={inference_score}")

In [13]:
def get_scan_number(image_dir,path):
    new_elem=path.replace(image_dir,'')
    elem_list=new_elem.split('/')
    return int(elem_list[1])

In [14]:
def get_scan_index(image_dir,path):
    new_elem=path.replace(image_dir,'')
    elem_list=new_elem.split('/')
    elem_list=elem_list[2].replace('.npy','')
    elem_list=elem_list.split('_')
    elem_list=elem_list[1]
    return int(elem_list)

In [15]:
def run_train_with_cross_validation(epochs,learning_rates,cross_validation):
    image_dir = '/media/varga-denes/linux-D/RETOUCH_TRAINING_NPY/imagesTr'
    label_dir = '/media/varga-denes/linux-D/RETOUCH_TRAINING_NPY/labelsTr'
    model_dir = '/media/varga-denes/E/modellek_augmentáció_variánsok/MedianBlur/'
    for epoch in epochs:
        for lr in learning_rates:
            inference_values = []
            for cross_val in range(cross_validation):
                dataset = load_train_and_val(image_dir,label_dir,cross_validation,cross_val)

                train_dataset = SegmentationDataset(dataset["train"], transform=train_transform) 
                #val_dataset = SegmentationDataset(dataset["validation"], transform=val_transform)
                
                train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=0, pin_memory=True,collate_fn=collate_fn)
                #val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True, num_workers=4, pin_memory=True,collate_fn=collate_fn)
                
                model=training(epoch,lr,cross_val,train_dataloader)
                #run_inference(image_dir,model,val_dataloader,epoch,lr,cross_val)
                #inference_score=run_inference(image_dir,model,val_dataloader,epoch,lr,cross_val)

                logging_model_name=model_dir+'/'+str(epoch)+'_'+str(lr)+'_'+str(cross_val)+'.pth'
                torch.save(model.state_dict(),logging_model_name)
                #log_run(epoch,lr,cross_val,inference_score)

In [19]:
epochs=[10]
learning_rates=['1e-3']
cross_validation=5
run_train_with_cross_validation(epochs,learning_rates,cross_validation)
#encoder nélkül lehet menteni

Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2352454061816114
Loss average 1.2015511480236203
Epoch 2/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2476672290136703
Loss average 0.9986950557997915
Epoch 3/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.23078956349814106
Loss average 0.9887388451378557
Epoch 4/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.25251835731119027
Loss average 0.9292176175193001
Epoch 5/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2507471124044807
Loss average 0.9401630511359563
Epoch 6/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2511854655076994
Loss average 0.9282946036123121
Epoch 7/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2573134317630675
Loss average 0.899566045815615
Epoch 8/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.26297165061356137
Loss average 0.8987125833638203
Epoch 9/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.27143844738547573
Loss average 0.8922611237711208
Epoch 10/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.271906598708582
Loss average 0.8849080201460277


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.21949076962554712
Loss average 1.2500105062021096
Epoch 2/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.23093828746150002
Loss average 1.0573020114148517
Epoch 3/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2450780983730677
Loss average 0.9822203085178466
Epoch 4/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.24414075512359165
Loss average 0.9621131281894384
Epoch 5/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2593854343833874
Loss average 0.9219274808523021
Epoch 6/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2550264549801637
Loss average 0.9320677359552255
Epoch 7/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.26133536175719385
Loss average 0.8975037994169737
Epoch 8/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2372798341583166
Loss average 0.9384916612194686
Epoch 9/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2503958608120878
Loss average 0.8981233715916361
Epoch 10/10
Learning rate: 0.001000


  0%|          | 0/1353 [00:00<?, ?it/s]

Dice score average 0.2961521141522916
Loss average 0.910443738268219


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.2170291328769467
Loss average 1.241873739298546
Epoch 2/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.242974786791182
Loss average 1.0243299932346503
Epoch 3/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.23895131490810864
Loss average 0.984691816950744
Epoch 4/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.25840520482247087
Loss average 0.9453184265743271
Epoch 5/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.25805407218491494
Loss average 0.9177640602105324
Epoch 6/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.258013895756002
Loss average 0.9217963103184953
Epoch 7/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.27191155556480345
Loss average 0.9051702264332233
Epoch 8/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.2707995334686678
Loss average 0.9250759011080639
Epoch 9/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.28124562594015884
Loss average 0.8857136069819694
Epoch 10/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.27455226235692626
Loss average 0.8749005352385268


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.21783585157205512
Loss average 1.2209367212008475
Epoch 2/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.212047941014862
Loss average 1.0623123715852054
Epoch 3/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.23247830560703434
Loss average 1.0150336000950593
Epoch 4/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.2433290938174699
Loss average 0.9365073278990295
Epoch 5/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.25224271374119767
Loss average 0.9212446820791206
Epoch 6/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.26124428208388234
Loss average 0.8938037640351907
Epoch 7/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.2653336049849267
Loss average 0.8954574631192451
Epoch 8/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.25439373992495157
Loss average 0.8950103170946755
Epoch 9/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.24923092499109442
Loss average 0.9044664916824493
Epoch 10/10
Learning rate: 0.001000


  0%|          | 0/1401 [00:00<?, ?it/s]

Dice score average 0.2463545988258679
Loss average 1.0118876456117338


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.22137956727397784
Loss average 1.2472989120505105
Epoch 2/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.2296675142028517
Loss average 1.0138981818608497
Epoch 3/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.22447795385952343
Loss average 0.9880097535234399
Epoch 4/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.24490087688428025
Loss average 0.9238921627287857
Epoch 5/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.24429325764624701
Loss average 0.918344292721965
Epoch 6/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.24484397526739193
Loss average 0.9763959109013789
Epoch 7/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.25312796645935304
Loss average 0.884163200764808
Epoch 8/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.26270383369096767
Loss average 0.9077869775093392
Epoch 9/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.2428255837757409
Loss average 0.9363043361089446
Epoch 10/10
Learning rate: 0.001000


  0%|          | 0/1429 [00:00<?, ?it/s]

Dice score average 0.2614892977697187
Loss average 0.8870588417100435


In [21]:
def compute_dice_for_inference_3d(preds_3d, targets_3d, num_classes):
    # shape: [D, H, W] -> [1, D, H, W]
    preds_3d = preds_3d.unsqueeze(0).long()
    targets_3d = targets_3d.unsqueeze(0).long()

    # one-hot encode: [1, D, H, W, C]
    preds_one_hot = torch.nn.functional.one_hot(preds_3d, num_classes=num_classes)
    targets_one_hot = torch.nn.functional.one_hot(targets_3d, num_classes=num_classes)

    # permute to [1, C, D, H, W]
    preds_one_hot = preds_one_hot.permute(0, 4, 1, 2, 3).float()
    targets_one_hot = targets_one_hot.permute(0, 4, 1, 2, 3).float()

    # Dice computation over [D, H, W] for each class
    intersection = (preds_one_hot * targets_one_hot).sum(dim=(2, 3, 4))
    union = preds_one_hot.sum(dim=(2, 3, 4)) + targets_one_hot.sum(dim=(2, 3, 4))

    dice = (2.0 * intersection) / (union + 1e-6)  # small epsilon to avoid division by 0

    return dice.squeeze(0)  # shape: [num_classes]


In [20]:
#Load model and run inference on it
def run_3d_inference_on_models(epoch,learning_rate,cross_validation):
    path='/media/varga-denes/E/modellek_augmentáció_variánsok/MedianBlur/'+str(epoch)+'_'+str(learning_rate)+'_'+str(cross_validation)+'.pth'
    
    model = Dinov2ForSemanticSegmentation.from_pretrained("facebook/dinov2-base", id2label=id2label, num_labels=len(id2label))
    model.load_state_dict(torch.load(path, weights_only=True))
    
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    image_dir = '/media/varga-denes/linux-D/RETOUCH_TRAINING_NPY/imagesTr'
    label_dir = '/media/varga-denes/linux-D/RETOUCH_TRAINING_NPY/labelsTr'
    dataset = load_train_and_val(image_dir,label_dir,5,cross_validation)

    #image_dir = '/media/varga-denes/linux-D/AROI_VALIDATE/imagesTr'
    #label_dir = '/media/varga-denes/linux-D/AROI_VALIDATE/labelsTr'
    #dataset = load_val(image_dir,label_dir)
    
    for name, param in model.named_parameters(): 
      if name.startswith("dinov2"): 
        param.requires_grad = False 
    
    val_dataset = SegmentationDataset(dataset["validation"], transform=val_transform)
    
    val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True,collate_fn=collate_fn)
    
    image_dict={}
    label_dict={}
    maxdict={}
    for idx, batch in enumerate(tqdm(val_dataloader)):
        scan_number = get_scan_number(image_dir,batch["image_path"][0])
        scan_index = get_scan_index(image_dir,batch["image_path"][0])
        key=str(scan_number)+'_'+str(scan_index)
        image_dict[key]=batch["pixel_values"]
        label_dict[key]=batch["labels"]
    
        if scan_number not in maxdict:
            maxdict[scan_number] = scan_index
        else:
            maxdict[scan_number] = max(maxdict[scan_number], scan_index)
            
    model = model.to(device)
    
    columns = ["scan_number","Background", "IRF", "SRF", "PED"]
    df = pd.DataFrame(columns=columns)
    
    for idx,key in enumerate(maxdict.keys()):
        value=maxdict[key]
        preds_3d = []
        labels_3d = []
        for val in range(value+1):
            keyval=str(key)+'_'+str(val)
            test_image = image_dict[keyval]
            labels = label_dict[keyval]
            with torch.no_grad():
                outputs = model(test_image.to(device))
                size=test_image.shape[:2]
                logits = outputs.logits
                preds = logits.argmax(dim=1)
                preds_3d.append(preds.squeeze(0).cpu())  # shape: [H, W]
                labels_3d.append(labels.squeeze(0).cpu())  # shape: [H, W]
    
        # Stack to 3D: shape [D, H, W] where D = value+1
        preds_3d = torch.stack(preds_3d, dim=0)
        labels_3d = torch.stack(labels_3d, dim=0)
                
        dice_score = compute_dice_for_inference_3d(preds_3d.detach().cpu(), labels_3d.detach().cpu(), num_classes=len(id2label))
        dice_list = [round(val, 4) for val in dice_score.squeeze().tolist()]
        df.loc[idx] = [key,dice_list[0],dice_list[1],dice_list[2],dice_list[3]]
        
    csv_filename=str(epoch)+'_'+str(learning_rate)+'_'+str(cross_validation)+'_3d_inference.csv'
    print(csv_filename,' Is ready!')
    df.to_csv(csv_filename, index=False)

In [22]:
val_transform = A.Compose([ 
    A.Resize(width=448, height=448),
    A.MedianBlur(blur_limit=5,p=0.5),
    A.Normalize(mean=ADE_MEAN, std=ADE_STD),
], is_check_shapes=False) 

epochs=[10]
learning_rates=['1e-3']
cross_validation=5
for epoch in epochs:
    for lr in learning_rates:
        for cross_val in range(cross_validation):
            print(cross_val)
            run_3d_inference_on_models(epoch,lr,cross_val)

0


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1525 [00:00<?, ?it/s]

10_1e-3_0_3d_inference.csv  Is ready!
1


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1525 [00:00<?, ?it/s]

10_1e-3_1_3d_inference.csv  Is ready!
2


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1333 [00:00<?, ?it/s]

10_1e-3_2_3d_inference.csv  Is ready!
3


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1333 [00:00<?, ?it/s]

10_1e-3_3_3d_inference.csv  Is ready!
4


Some weights of Dinov2ForSemanticSegmentation were not initialized from the model checkpoint at facebook/dinov2-base and are newly initialized: ['classifier.classifier.bias', 'classifier.classifier.weight', 'classifier.conv.bias', 'classifier.conv.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1220 [00:00<?, ?it/s]

10_1e-3_4_3d_inference.csv  Is ready!
