In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
import os
import sys
from torch.utils.data import DataLoader, Dataset
import math
from torchvision.transforms import v2
from PIL import Image
import matplotlib.pyplot as plt
import cv2
from torch.autograd import Variable
from torchvision import models,transforms
from transformers import pipeline
import torchvision.transforms.functional as TF
from transformers import AutoImageProcessor, AutoModelForDepthEstimation, AutoConfig

In [None]:
class CDC(nn.Module):
    '''
    This class performs central difference convolution (CDC) operation. First the normal convolution is performed and then the difference convolution is performed. The output is the difference between the two is taken.
    '''
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1,
                 padding=1, dilation=1, groups=1, bias=False, theta=0.7):

        super(CDC, self).__init__()
        self.bias= bias
        self.stride = stride
        self.groups = groups
        self.dilation = dilation
        self.theta = theta
        self.padding = padding
        if bias:
            self.bias = nn.Parameter(torch.zeros(out_channels))
        else:
            self.bias = None
            
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding if kernel_size==3 else 0, dilation=dilation, groups=groups, bias=bias)

    def forward(self, x):
        out_normal = self.conv(x)
        # if conv.weight is (out_channels, in_channels, kernel_size, kernel_size),
        # then the  self.conv.weight.sum(2) will return (out_channels, in_channels,kernel_size)
        # and self.conv.weight.sum(2).sum(2) will return (out_channels,n_channels)
        kernel_diff = self.conv.weight.sum(2).sum(2)
        # Here we are adding extra dimensions such that the kernel_diff is of shape (out_channels, in_channels, 1, 1) so that convolution can be performed.
        kernel_diff = kernel_diff[:, :, None, None]
        out_diff = F.conv2d(input=x, weight=kernel_diff, bias=self.bias, stride=self.stride, padding=0, groups=self.groups)
        return out_normal - self.theta * out_diff
        

In [None]:
class conv_block_nested(nn.Module):
    def __init__(self, in_ch,  out_ch):
        super(conv_block_nested, self).__init__()
        self.activation = nn.ReLU(inplace=True)
        self.conv1 = CDC(in_ch, out_ch, kernel_size=3, padding=1, bias=True)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = CDC(out_ch, out_ch, kernel_size=3, padding=1, bias=True)
        self.bn2 = nn.BatchNorm2d(out_ch)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.activation(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        output = self.activation(x)

        return output

In [None]:
class CustomDataset(Dataset): 
    
    def __init__(self, path, device, transform=None, img_size=(128, 128)):
        super(CustomDataset, self).__init__()
        self.device = device
        self.images = []
        self.labels = []
        self.img_size = img_size
        self.transform = transform
        self.path = path
        self.num_channels = 1
        
        for folder in os.listdir(self.path):
            label = 1 if 'client' in folder else 0
            for image in os.listdir(os.path.join(self.path, folder)):
                if image.endswith('.jpg') or image.endswith('.png'):
                    img_path = os.path.join(self.path, folder, image)
                    self.images.append(img_path)
                    self.labels.append(label)
        
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert("RGB")

        if self.transform:
            img= self.transform(img)
            
        return img, self.labels[idx]

In [None]:
class SimulateDistanceTransform:
    def __init__(self, min_scale=0.5, max_scale=1.0):
        self.min_scale = min_scale
        self.max_scale = max_scale

    def __call__(self, img):
        # Randomly choose a scale factor
        scale_factor = random.uniform(self.min_scale, self.max_scale)
        
        # Get original dimensions
        original_width, original_height = img.size
        
        # Calculate new dimensions
        new_width = int(original_width * scale_factor)
        new_height = int(original_height * scale_factor)
        
        # Resize the image
        img = transforms.Resize((new_height, new_width))(img)
        
        # Pad the image to the original size
        padding = (
            (original_width - new_width) // 2,
            (original_height - new_height) // 2,
            (original_width - new_width + 1) // 2,
            (original_height - new_height + 1) // 2
        )
        img = transforms.Pad(padding,padding_mode='edge')(img)
        
        # Optional: Apply a slight blur
        if scale_factor < 0.65:
            img = transforms.GaussianBlur(kernel_size=3)(img)
        
        return img

In [None]:
img_size = (252, 252)
batch_size = 600

transf = transforms.Compose([
    SimulateDistanceTransform(min_scale=0.4, max_scale=1.0),  
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
#     transforms.CenterCrop(img_size),
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),
    transforms.Resize(img_size),
    transforms.ToTensor()
])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
train_dataset = CustomDataset("/kaggle/input/increased-liveliness-detection/train",device,transf,img_size=img_size)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)

val_dataset = CustomDataset("/kaggle/input/increased-liveliness-detection/val",device,transf,img_size=img_size)
val_loader = DataLoader(val_dataset,batch_size=batch_size,shuffle=True)

# test_dataset = CustomDataset("/kaggle/input/increased-liveliness-detection/test",device,transf,img_size=img_size)
# test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)

In [None]:
class FineTuneDepthAnything(nn.Module):
    def __init__(self, device,load_trained=False,model_path=None):
        super(FineTuneDepthAnything, self).__init__()
        if load_trained:
            config = AutoConfig.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
            self.depth_anything = AutoModelForDepthEstimation.from_config(config)
            state_dict = torch.load(model_path, map_location=device)
                
            # Adjust keys in the state dictionary to match the model's keys
            new_state_dict = {}
            for key, value in state_dict.items():
                new_key = key.replace("depth_anything.", "")
                new_state_dict[new_key] = value

            # Load the adjusted state dictionary into the model
            self.depth_anything.load_state_dict(new_state_dict)
        else:
            self.depth_anything = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
            for name,param in self.depth_anything.named_parameters():
                if 'head' in name or 'neck.fusion_stage.layers.2.residual_layer' in name or 'neck.fusion_stage.layers.3' in name:
                    param.requires_grad = True
                else:
                    param.requires_grad = False
        
        self.depth_anything = self.depth_anything.to(device)
                
    def forward(self, inp):
        # print(f'inp shape: {inp.shape}')
        return self.depth_anything(inp).predicted_depth.unsqueeze(1)

In [None]:
class ClassifierUCDCN(nn.Module):
    def __init__(self, dropout=0.5):
        super(ClassifierUCDCN, self).__init__()        
        self.layers =8
        self.dropout_prob = dropout
        self.img_size = (252, 252)
        self.hidden_size = 64
        self.conv1 = conv_block_nested(1,self.layers)
        self.relu = nn.ReLU()
        self.maxpool = nn.AvgPool2d(kernel_size=2,stride=2)
        self.conv2 = conv_block_nested(self.layers,1)
        # Maxpool
        self.linear_1 = nn.Linear((self.img_size[0]//4 * self.img_size[1]//4), self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_prob)
        self.linear_2 = nn.Linear(self.hidden_size, 2)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, inp):
        conv1 = self.conv1(inp)
        maxpool = self.maxpool(conv1)
        conv2 = self.conv2(maxpool)
        maxpool2 = self.maxpool(conv2)
        linear_1 = self.linear_1(maxpool2.view(-1, self.img_size[0]//4 * self.img_size[1]//4))
        relu = self.relu(linear_1)
        dropout = self.dropout(relu)
        linear_2 = self.linear_2(dropout)
        return self.sigmoid(linear_2)

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, alpha=0.27, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma  # Focusing parameter
        self.size_average = size_average  # Whether to average the loss

        # Handling the alpha parameter for balancing the classes
        if isinstance(alpha, (float, int)):
            self.alpha = torch.Tensor([alpha, 1 - alpha])
        elif isinstance(alpha, list):
            self.alpha = torch.Tensor(alpha)
        else:
            self.alpha = None

    def forward(self, inp, target):
        # Reshape input if necessary
        if inp.dim() > 2:
            # Flatten the input except for the batch size
            inp = inp.view(inp.size(0), inp.size(1), -1)  # N,C,H,W => N,C,H*W
            inp = inp.transpose(1, 2)  # N,C,H*W => N,H*W,C
            inp = inp.contiguous().view(-1, inp.size(2))  # N,H*W,C => N*H*W,C
        
        # Flatten target to match input dimensions
        target = target.view(-1, 1)

        # Compute log probability
        logpt = F.log_softmax(inp, dim=1)
        logpt = logpt.gather(1, target)  # Gather the probabilities with respect to target labels
        logpt = logpt.view(-1)
        pt = logpt.data.exp()  # Convert log probabilities to probabilities

        # Apply alpha weighting
        if self.alpha is not None:
            if self.alpha.type() != inp.data.type():
                self.alpha = self.alpha.type_as(inp.data)
            at = self.alpha.gather(0, target.data.view(-1))
            logpt = logpt * at

        # Compute the focal loss
        loss = -1 * (1 - pt) ** self.gamma * logpt

        # Return the average or sum of losses
        if self.size_average:
            return loss.mean()
        else:
            return loss.sum()

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
depth_map_model = FineTuneDepthAnything(device, load_trained=True, model_path='/kaggle/input/finetune_depth_anything/pytorch/63_epochs_trained/1/fine_tuning_depth_anything.pth').to(device)
depth_map_model = torch.nn.DataParallel(depth_map_model,device_ids=[0,1]).to(device)
model = ClassifierUCDCN(dropout=0.4).to(device)
model.load_state_dict(torch.load('/kaggle/input/finetune_depth_anyhting_classifier/pytorch/64_size_7_epochs_trained/1/64_finetune_depth_anything_classifier.pth', map_location=device,weights_only=True))
model = torch.nn.DataParallel(model,device_ids=[0,1]).to(device)

In [None]:
criterion = FocalLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0015,weight_decay=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)
num_epochs = 50

In [None]:
train_loss = []
val_loss = []
best_epoch_loss = float('inf')
for epoch in range(num_epochs):
    running_loss = 0.0
    total_correct = 0  # Total correct predictions
    total_samples = 0  # Total samples processed
    model.train()
    for i, data in enumerate(train_loader, 0):
        inputs, binary_labels = data
        inputs, binary_labels = inputs.to(device), binary_labels.to(device)
        optimizer.zero_grad()
        with torch.no_grad():
            depth_maps = depth_map_model(inputs)
        pred_labels = model(depth_maps)
        loss = criterion(pred_labels, binary_labels)
        
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        # Update total correct predictions and total samples for accuracy calculation
        total_correct += (pred_labels.argmax(1) == binary_labels).sum().item()
        total_samples += binary_labels.size(0)

    epoch_accuracy = total_correct / total_samples  # Calculate accuracy for the epoch
    train_loss.append(running_loss)
    print(f"Epoch {epoch+1}, Training Loss: {running_loss} , Accuracy: {epoch_accuracy} and lr:{optimizer.param_groups[0]['lr']}")
    if (epoch + 1) % 5 == 0:
        model.eval()
        with torch.no_grad():
            running_loss_test = 0.0
            total_correct_test = 0  # Total correct predictions for validation
            total_samples_test = 0  # Total samples processed for validation
            for i, data in enumerate(val_loader, 0):
                inputs_test, binary_labels_test = data
                inputs_test, binary_labels_test = inputs_test.to(device), binary_labels_test.to(device)
                depth_maps_test = depth_map_model(inputs_test)
                pred_labels_test = model(depth_maps_test)
                loss_test = criterion(pred_labels_test, binary_labels_test)
                running_loss_test += loss_test.item()

                # Update total correct predictions and total samples for validation accuracy calculation
                total_correct_test += (pred_labels_test.argmax(1) == binary_labels_test).sum().item()
                total_samples_test += binary_labels_test.size(0)

            epoch_accuracy_test = total_correct_test / total_samples_test  # Calculate accuracy for the validation epoch
            val_loss.append(running_loss_test)
            print(f"Validation Loss: {running_loss_test} , Accuracy: {epoch_accuracy_test} and lr:{optimizer.param_groups[0]['lr']}")

            if running_loss_test < best_epoch_loss:
                best_epoch_loss = running_loss_test
                torch.save(model.module.state_dict(), "best_finetune_depth_anything_classifier.pth")

    torch.save(model.module.state_dict(), "finetune_depth_anything_classifier.pth")
    scheduler.step(running_loss)