In [1]:
import argparse
from copy import deepcopy
import logging
import os
import pprint

import torch
from torch import nn
import torch.backends.cudnn as cudnn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import yaml
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
import torch.nn.functional as F

from dataset.semi import SemiDataset
from model.semseg.dpt import DPT
from model.semseg.dpt import DPT_with_Feature
from supervised import evaluate
from util.classes import CLASSES
from util.ohem import ProbOhemCrossEntropy2d
from util.utils import count_params, init_log, AverageMeter
from util.dist_helper import setup_distributed
from Regularization_losses import *


2025-01-23 17:38:36.205594: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-23 17:38:36.213709: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-23 17:38:36.222505: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-23 17:38:36.225071: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-23 17:38:36.232194: I tensorflow/core/platform/cpu_feature_guar

In [2]:
def show_images(tensor):
    """
    Displays images from a 4D Tensor (batch_size, channels, x, y).
    
    Args:
    tensor (torch.Tensor): A tensor containing image data.
    """
    # Check if the tensor requires normalization
    if tensor.min() < 0 or tensor.max() > 1:
        # Normalize tensor to the range [0, 1]
        tensor = (tensor - tensor.min()) / (tensor.max() - tensor.min())
    
    batch_size = tensor.size(0)
    fig, axs = plt.subplots(1, batch_size, figsize=(batch_size * 3, 3))
    
    for i, img in enumerate(tensor):
        # Check the number of channels in the image
        if img.shape[0] == 3:
            # Convert from (channels, x, y) to (x, y, channels) for RGB images
            img = img.permute(1, 2, 0)
        elif img.shape[0] == 1:
            # Squeeze channel dimension for grayscale images
            img = img.squeeze(0)
        else:
            raise ValueError("Tensor contains images with unsupported channel size.")
        
        # Handle subplots for a batch size of 1
        ax = axs[i] if batch_size > 1 else axs
        ax.imshow(img)
        ax.axis('off')  # Hide axes ticks
    
    plt.show()

In [3]:
dataset = 'ade20k'
method = 'unimatch_v2'
exp = 'dinov2_small'
split = '1_2'
port = '8080'

# Constructing file paths
config = f'configs/{dataset}.yaml'
labeled_id_path = f'splits/{dataset}/{split}/labeled.txt'
unlabeled_id_path = f'splits/{dataset}/{split}/unlabeled.txt'
save_path = f'exp/{dataset}/{method}/{exp}/{split}'


In [4]:

cfg = yaml.load(open(config, "r"), Loader=yaml.Loader)

# logger = init_log('global', logging.INFO)
# logger.propagate = 0

# rank, world_size = setup_distributed(port=port)

# if rank == 0:
#     all_args = {**cfg, **vars(args), 'ngpus': world_size}
#     logger.info('{}\n'.format(pprint.pformat(all_args)))
    
#     writer = SummaryWriter(args.save_path)
    
#     os.makedirs(args.save_path, exist_ok=True)

cudnn.enabled = True
cudnn.benchmark = True

model_configs = {
    'small': {'encoder_size': 'small', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'base': {'encoder_size': 'base', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'large': {'encoder_size': 'large', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'giant': {'encoder_size': 'giant', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

# model = DPT(**{**model_configs[cfg['backbone'].split('_')[-1]], 'nclass': cfg['nclass']}).cuda()
model = DPT_with_Feature(**{**model_configs[cfg['backbone'].split('_')[-1]], 'nclass': cfg['nclass']}).cuda()

state_dict = torch.load(f'./pretrained/{cfg["backbone"]}.pth')
# model.backbone.load_state_dict(state_dict)

total_state_dict = torch.load('./pretrained/ade20k_unimatch_v2_1_32labels_dinov2_small.pth')
model.load_state_dict(total_state_dict)

# if cfg['lock_backbone']:
#     model.lock_backbone()

  state_dict = torch.load(f'./pretrained/{cfg["backbone"]}.pth')
  total_state_dict = torch.load('./pretrained/ade20k_unimatch_v2_1_32labels_dinov2_small.pth')


<All keys matched successfully>

In [14]:
model_ema = deepcopy(model).cuda()
model_ema.eval()
for param in model_ema.parameters():
    param.requires_grad = False

if cfg['criterion']['name'] == 'CELoss':
    criterion_l = nn.CrossEntropyLoss(**cfg['criterion']['kwargs'])
elif cfg['criterion']['name'] == 'OHEM':
    criterion_l = ProbOhemCrossEntropy2d(**cfg['criterion']['kwargs'])
else:
    raise NotImplementedError('%s criterion is not implemented' % cfg['criterion']['name'])

class NormalizedCompactnessNormLoss(nn.Module):
    def __init__(self):
        super(NormalizedCompactnessNormLoss, self).__init__()

    def forward(self, features_tuple):
        # Ensure the input is a tuple with exactly 4 tensors
        assert isinstance(features_tuple, tuple) and len(features_tuple) == 4, "Input must be a tuple of four tensors."

        # Calculate the normalized norm for each tensor
        normalized_norms = [torch.norm(f) / f.numel() for f in features_tuple]

        # Compute the mean of these normalized norms
        mean_normalized_norm = torch.mean(torch.stack(normalized_norms))

        # Return the mean normalized norm as the loss
        return mean_normalized_norm


import torch

class GradientPenaltyLoss(nn.Module):
    def __init__(self, entropy_qz=None):
        super(GradientPenaltyLoss, self).__init__()
        self.entropy_qz = entropy_qz

    def forward(self, embeddings, y_pred):
        # Initialize total loss to zero
        total_loss = 0.0

        # Iterate over each embedding in the tuple
        for embedding in embeddings:
            # Ensure that each embedding requires gradient
            if not embedding.requires_grad:
                raise ValueError("Each embedding must require gradients.")

            # Compute squared prediction error
            pred_loss = torch.square(y_pred)

            # Calculate gradients of pred_loss with respect to this embedding
            grad_pred_loss = torch.autograd.grad(outputs=pred_loss, inputs=embedding,
                                                 grad_outputs=torch.ones_like(pred_loss),
                                                 create_graph=True,allow_unused=True)[0]

            # Handle the case where gradients are unused (i.e., None)
            if grad_pred_loss is None:
                grad_pred_loss = torch.zeros_like(embedding)

            # Normalize the gradients
            norm = torch.norm(grad_pred_loss, p=2, dim=-1, keepdim=True) + 1e-8
            normalized_grad = grad_pred_loss / norm
            grad_squared = torch.square(normalized_grad)
            
            # Apply entropy weighting if provided
            if self.entropy_qz is not None:
                weighted_grad_squared = self.entropy_qz * grad_squared
            else:
                weighted_grad_squared = grad_squared
            
            # Sum the loss over all embeddings
            total_loss += torch.mean(weighted_grad_squared)

        # Average the loss over the number of embeddings to normalize scale
        loss = total_loss / len(embeddings)
        
        return loss


criterion_norm = NormalizedCompactnessNormLoss()
criterion_u = nn.CrossEntropyLoss(reduction='none')
criterion_gradient = GradientPenaltyLoss()

trainset_u = SemiDataset(
    cfg['dataset'], cfg['data_root'], 'train_u', cfg['crop_size'], unlabeled_id_path
)
trainset_l = SemiDataset(
    cfg['dataset'], cfg['data_root'], 'train_l', cfg['crop_size'], labeled_id_path, nsample=len(trainset_u.ids)
)
valset = SemiDataset(
    cfg['dataset'], cfg['data_root'], 'val'
)

In [15]:
criterion_gradient(pred_x_features, pred_x)

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)

In [None]:
criterion_gradient(pred_u_w_features, pred_u_w)

In [6]:
# trainsampler_l = torch.utils.data.distributed.DistributedSampler(trainset_l)

trainloader_l = DataLoader(
    trainset_l, batch_size=cfg['batch_size'], pin_memory=True, num_workers=4, drop_last=True, shuffle=False)

# trainsampler_u = torch.utils.data.distributed.DistributedSampler(trainset_u)

trainloader_u = DataLoader(
    trainset_u, batch_size=cfg['batch_size'], pin_memory=True, num_workers=4, drop_last=True, shuffle=False)

# valsampler = torch.utils.data.distributed.DistributedSampler(valset)

valloader = DataLoader(
    valset, batch_size=1, pin_memory=True, num_workers=1, drop_last=False, shuffle=False)

total_iters = len(trainloader_u) * cfg['epochs']
previous_best, previous_best_ema = 0.0, 0.0
best_epoch, best_epoch_ema = 0, 0
epoch = -1


In [7]:
loader = zip(trainloader_l, trainloader_u)

In [8]:
model.eval()

DPT_with_Feature(
  (backbone): DinoVisionTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
      (norm): Identity()
    )
    (blocks): ModuleList(
      (0-11): 12 x NestedTensorBlock(
        (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (attn): MemEffAttention(
          (qkv): Linear(in_features=384, out_features=1152, bias=True)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=384, out_features=384, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (ls1): LayerScale()
        (drop_path1): Identity()
        (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=384, out_features=1536, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=1536, out_features=384, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
     

In [9]:
loader_enumerator = iter(loader)


In [12]:
# for i, ((img_x, mask_x),(img_u_w, img_u_s1, img_u_s2, ignore_mask, cutmix_box1, cutmix_box2)) in enumerate(loader):
((img_x, mask_x),(img_u_w, img_u_s1, img_u_s2, ignore_mask, cutmix_box1, cutmix_box2)) = next(loader_enumerator)

img_x, mask_x = img_x.cuda(), mask_x.cuda()
img_u_w, img_u_s1, img_u_s2 = img_u_w.cuda(), img_u_s1.cuda(), img_u_s2.cuda()
ignore_mask, cutmix_box1, cutmix_box2 = ignore_mask.cuda(), cutmix_box1.cuda(), cutmix_box2.cuda()

with torch.no_grad():
    pred_u_w, pred_u_w_features = model_ema(img_u_w)
    pred_u_w = pred_u_w.detach()
    conf_u_w = pred_u_w.softmax(dim=1).max(dim=1)[0]
    mask_u_w = pred_u_w.argmax(dim=1)

img_u_s1[cutmix_box1.unsqueeze(1).expand(img_u_s1.shape) == 1] = img_u_s1.flip(0)[cutmix_box1.unsqueeze(1).expand(img_u_s1.shape) == 1]
img_u_s2[cutmix_box2.unsqueeze(1).expand(img_u_s2.shape) == 1] = img_u_s2.flip(0)[cutmix_box2.unsqueeze(1).expand(img_u_s2.shape) == 1]

pred_x, pred_x_features = model(img_x)
pred_u_s, pred_u_s_features = model(torch.cat((img_u_s1, img_u_s2)), comp_drop=True)

pred_u_s1, pred_u_s2 = pred_u_s.chunk(2)
# pred_u_s1_features, pred_u_s2_features = pred_u_s_features.chunk(2)

mask_u_w_cutmixed1, conf_u_w_cutmixed1, ignore_mask_cutmixed1 = mask_u_w.clone(), conf_u_w.clone(), ignore_mask.clone()
mask_u_w_cutmixed2, conf_u_w_cutmixed2, ignore_mask_cutmixed2 = mask_u_w.clone(), conf_u_w.clone(), ignore_mask.clone()

mask_u_w_cutmixed1[cutmix_box1 == 1] = mask_u_w.flip(0)[cutmix_box1 == 1]
conf_u_w_cutmixed1[cutmix_box1 == 1] = conf_u_w.flip(0)[cutmix_box1 == 1]
ignore_mask_cutmixed1[cutmix_box1 == 1] = ignore_mask.flip(0)[cutmix_box1 == 1]

mask_u_w_cutmixed2[cutmix_box2 == 1] = mask_u_w.flip(0)[cutmix_box2 == 1]
conf_u_w_cutmixed2[cutmix_box2 == 1] = conf_u_w.flip(0)[cutmix_box2 == 1]
ignore_mask_cutmixed2[cutmix_box2 == 1] = ignore_mask.flip(0)[cutmix_box2 == 1]

loss_x = criterion_l(pred_x, mask_x)
loss_x_norm = criterion_norm(pred_x_features)
loss_x_gradient = criterion_gradient(pred_x_features, pred_x)

loss_u_s1 = criterion_u(pred_u_s1, mask_u_w_cutmixed1)
loss_u_s1 = loss_u_s1 * ((conf_u_w_cutmixed1 >= cfg['conf_thresh']) & (ignore_mask_cutmixed1 != 255))
loss_u_s1 = loss_u_s1.sum() / (ignore_mask_cutmixed1 != 255).sum().item()

loss_u_s2 = criterion_u(pred_u_s2, mask_u_w_cutmixed2)
loss_u_s2 = loss_u_s2 * ((conf_u_w_cutmixed2 >= cfg['conf_thresh']) & (ignore_mask_cutmixed2 != 255))
loss_u_s2 = loss_u_s2.sum() / (ignore_mask_cutmixed2 != 255).sum().item()

loss_u_s = (loss_u_s1 + loss_u_s2) / 2.0

loss = (loss_x + loss_x_norm + loss_u_s) / 3.0
    
    # break

In [13]:
loss_x_gradient

tensor(0.0026, device='cuda:0', grad_fn=<DivBackward0>)

In [None]:
a, features = model(torch.cat((img_u_s1, img_u_s2)), comp_drop=True)

In [None]:
feature_list = list(features)
feature_tensor = torch.tensor(feature_list)

In [None]:
loss

In [None]:
criterion_norm(pred_x_features)

In [None]:
show_images(img_x.cpu())

In [None]:
show_images(mask_x.cpu().unsqueeze(1))

In [None]:
show_images(pred_x.argmax(1).detach().cpu().unsqueeze(1))

In [None]:
img_x.size()

In [None]:
latent = model.backbone.get_intermediate_layers(
            img_x, model.intermediate_layer_idx[model.encoder_size]
        )


# latent = model.backbone(img_x)
noise = np.random.normal(0, 3, latent[0].shape) 
noisy_latent = []
for latent_i in latent:
    noisy_latent.append(torch.tensor(latent_i.cpu() + np.float32(noise)).cuda())

noisy_latent = tuple(noisy_latent)
# noisy_latent = torch.tensor(noisy_latent).cuda()

patch_h, patch_w = img_x.shape[-2] // 14, img_x.shape[-1] // 14

pred_x_noisy_latent = model.head(noisy_latent, patch_h, patch_w)
pred_x_noisy_latent = F.interpolate(pred_x_noisy_latent, (patch_h * 14, patch_w * 14), mode='bilinear', align_corners=True)

In [None]:
latent[0].dtype

In [None]:
len(noisy_latent)

In [None]:
noise.shape

In [None]:
# Iterate over each batch
fig, axes = plt.subplots(2, 2, figsize=(12, 12))  # Prepare a subplot grid
batch_titles = ['Batch 1', 'Batch 2']  # Titles for each subplot

for i, batch_data in enumerate(noisy_latent):
    # Flatten each batch data to [1369, 384]
    latent_flat = batch_data.cpu().view(-1, 384)

    # PCA
    pca = PCA(n_components=2)
    latent_pca = pca.fit_transform(latent_flat.numpy())

    # t-SNE
    tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
    latent_tsne = tsne.fit_transform(latent_flat.numpy())

    # Plot PCA
    ax = axes[i, 0]
    ax.scatter(latent_pca[:, 0], latent_pca[:, 1], alpha=0.5)
    ax.set_title(f'PCA of {batch_titles[i]}')

    # Plot t-SNE
    ax = axes[i, 1]
    ax.scatter(latent_tsne[:, 0], latent_tsne[:, 1], alpha=0.5)
    ax.set_title(f't-SNE of {batch_titles[i]}')

plt.tight_layout()
plt.show()