In [1]:
import torch

In [2]:
a = torch.tensor([1, 2])
b = torch.tensor([3, 4])
c = torch.tensor([3, 4])


In [3]:
torch.stack((a, b, c), dim=1)

tensor([[1, 3, 3],
        [2, 4, 4]])

In [5]:
a = torch.tensor([[1], [2]])
b = torch.tensor([[1], [2]])

torch.cat([a, b], dim=1)

tensor([[1, 1],
        [2, 2]])

In [1]:
alpha = [1.0] * 5  # Example class weights


In [2]:
alpha

[1.0, 1.0, 1.0, 1.0, 1.0]

In [3]:
import torch
a = [1, 2, torch.tensor(3)]
torch.tensor(a)

tensor([1, 2, 3])

In [1]:
import cv2

if not cv2.cuda.getCudaEnabledDeviceCount():
    print("CUDA-enabled GPU not found. Exiting...")

CUDA-enabled GPU not found. Exiting...


In [2]:
from sklearn.metrics import precision_recall_fscore_support
import numpy as np


In [6]:
y_pred = np.array([1, 1, 2, 0, 3, 4, 2])
targets = np.array([1, 0, 2, 1, 4, 4, 3])

precision_recall_fscore_support(targets, y_pred, average='macro')


(0.4, 0.4, 0.3666666666666666, None)

In [8]:
y_pred = np.array([1, 0, 2, 1, 4, 4, 2])
targets = np.array([1, 0, 2, 1, 4, 4, 3])

precision_recall_fscore_support(targets, y_pred, average='macro')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


(0.7, 0.8, 0.7333333333333333, None)

In [9]:
import torch

In [15]:
a = torch.Tensor([1, 2, 3])
b = torch.tensor([1, 2, 3.])

In [16]:
a

tensor([1., 2., 3.])

In [17]:
b

tensor([1., 2., 3.])

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2, label_smoothing=0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.label_smoothing = label_smoothing

    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none', label_smoothing=self.label_smoothing)
        pt = torch.exp(-ce_loss)
        loss = (self.alpha[targets] * (1 - pt) ** self.gamma * ce_loss).mean()
        return loss

In [34]:
inputs = torch.tensor([[-2, -5, 3, 6, 12.], [-2, -5, 3, 6, 12.]])
targets = torch.tensor([4, 4])

ce_loss = F.cross_entropy(inputs, targets, reduction='none', label_smoothing=0.1)
ce_loss

tensor([0.9226, 0.9226])

In [9]:
pt = torch.exp(-ce_loss)
pt

tensor([0.9974, 0.9974])

In [10]:
gamma = 1
alpha = torch.tensor([0.2, 5, 3, 2, 3])
alpha[targets]

tensor([3., 3.])

In [11]:
loss = (alpha[targets] * (1 - pt) ** gamma * ce_loss).mean()
loss


tensor(2.0249e-05)

In [33]:
loss_fn = FocalLoss(alpha=alpha, gamma=1, label_smoothing=0.0)
loss_fn(inputs, targets)

tensor(2.0249e-05)

In [3]:
alpha = torch.tensor([0.5]) * 5
alpha

tensor([2.5000])

In [1]:
2 * 3 ** 2

18

In [None]:
import torch
import torchvision
from torch import nn

class OneHeadModel(nn.Module):
    def __init__(self, device, p_dropout):
        super(OneHeadModel, self).__init__()

        self.device = device
        self.p_dropout = p_dropout

        # weights = torchvision.models.ResNeXt50_32X4D_Weights.DEFAULT
        # model = torchvision.models.resnext50_32x4d(weights=weights)
        # model = torch.nn.Sequential(*(list(model.children())[:-2])) # remove last two layers
        # self.encoder = model

        # Load EfficientNet encoder
        weights = torchvision.models.EfficientNet_B4_Weights.DEFAULT
        efficientNet = torchvision.models.efficientnet_b4(weights=weights)
        self.encoder = efficientNet.features

        # Pooling layers
        self.global_max_pool = nn.AdaptiveMaxPool2d(1)
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)

        # Fully connected layers
        self.batch_norm_1= nn.BatchNorm1d(1792) 
        self.batch_norm_2= nn.BatchNorm1d(1792)

        self.dense1 = nn.Sequential(
            nn.Linear(1792 * 2, 512),
            nn.ReLU(),
            nn.Dropout(p=self.p_dropout)
        )

        # Classification head
        self.classification_head = nn.Sequential(
            nn.Linear(512, 32),
            nn.ReLU(),
            nn.Dropout(p=self.p_dropout),
            nn.Linear(32, 5) # 5 output nodes for classification
            )
        
        # Apply He initialization to classification_head
        self._initialize_weights()
        
    def _initialize_weights(self):
        
        # # Initialize dense1
        # nn.init.kaiming_normal_(self.dense1.weight, mode='fan_in', nonlinearity='relu')
        # if self.dense1.Linear.bias is not None:
        #     nn.init.zeros_(self.dense1.bias)

        for module in self.classification_head:
            if isinstance(module, nn.Linear):
                # Apply He initialization to weights
                nn.init.kaiming_normal_(module.weight, mode='fan_in', nonlinearity='relu')
                # Initialize biases to zero (optional, common practice)
                if module.bias is not None:
                    nn.init.zeros_(module.bias)

    def forward(self, x):
        x = self.encoder(x) # Extract features

        # Apply pooling layers
        max_pooled = self.global_max_pool(x).view(x.size(0), -1)
        avg_pooled = self.global_avg_pool(x).view(x.size(0), -1)

        # Concatenate
        x1 = self.batch_norm_1(max_pooled)
        x2 = self.batch_norm_2(avg_pooled)
        x = torch.concat([x1, x2], dim=1)
        print('x in model before relue is: ', x)
        x = torch.relu(self.dense1(x))

        # enc_out for visualizing data with t-SNE
        enc_out = x

        # Classification branch
        class_out = self.classification_head(x).float()

        return class_out, enc_out

    

In [10]:
model = OneHeadModel(device=torch.device, p_dropout=0.4)

In [11]:
from torchinfo import summary

In [12]:
# Print a summary using torchinfo (uncomment for actual output)
torch.manual_seed(33)
summary(model=model, 
        input_size=(32, 3, 240, 240), # make sure this is "input_size", not "input_shape"
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
) 

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
OneHeadModel (OneHeadModel)                                  [32, 3, 240, 240]    [32, 5]              --                   True
├─Sequential (encoder)                                       [32, 3, 240, 240]    [32, 1792, 8, 8]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 240, 240]    [32, 48, 120, 120]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 240, 240]    [32, 48, 120, 120]   1,296                True
│    │    └─BatchNorm2d (1)                                  [32, 48, 120, 120]   [32, 48, 120, 120]   96                   True
│    │    └─SiLU (2)                                         [32, 48, 120, 120]   [32, 48, 120, 120]   --                   --
│    └─Sequential (1)                                        [32, 48, 120, 120]   [32, 24, 120