In [4]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
from PIL import Image
import h5py
from sklearn.model_selection import KFold
import torch.nn.functional as F
import random
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False 

set_seed(38)
class CrowdDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.image_filenames = [f for f in os.listdir(data_dir) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_filename = self.image_filenames[idx]
        density_filename = img_filename.replace('.jpg', '.h5')
        img_path = os.path.join(self.data_dir, img_filename)
        density_path = os.path.join(self.data_dir, density_filename)
        image = Image.open(img_path).convert('RGB')

        with h5py.File(density_path, 'r') as hf:
            density_map = np.array(hf['density'])

        if self.transform:
            image = self.transform(image)

        density_map = torch.from_numpy(density_map).unsqueeze(0).float()  

        return image, density_map

class DConvNet_v1(nn.Module):
    def __init__(self, pretrained=True, num_regressors=3):
        super(DConvNet_v1, self).__init__()

        vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

        self.features = nn.Sequential(*list(vgg16.features.children())[:23])

        self.features.add_module('pool4', nn.MaxPool2d(kernel_size=2, stride=1, padding=0))

        self.features.add_module('dilated_conv5_1', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_1', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_2', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_2', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_3', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_3', nn.ReLU(inplace=True))

        self.regressors = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(512, 64, kernel_size=1, groups=64),  
                nn.ReLU(inplace=True),
                nn.Dropout(0.3),  
                nn.Conv2d(64, 1, kernel_size=1)
            ) for _ in range(num_regressors)
        ])
        self.regressors.apply(self.init_weights)

    def forward(self, x):
        x = self.features(x)
        outputs = [regressor(x) for regressor in self.regressors]
        return outputs

    def init_weights(self, m):
        if isinstance(m, nn.Conv2d):
            torch.nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                m.bias.data.fill_(0.01)

def negative_correlation_loss(outputs, target, lambda_param=0.001):
    mse_loss = nn.MSELoss()
    target = target.to(outputs[0].device)
    total_mse = sum([mse_loss(F.interpolate(output, size=target.shape[2:], mode='bilinear', align_corners=False), target) for output in outputs]) / len(outputs)

    correlations = []
    for i in range(len(outputs)):
        for j in range(i + 1, len(outputs)):
            o_i = outputs[i].view(-1)
            o_j = outputs[j].view(-1)
            corr = torch.corrcoef(torch.stack([o_i, o_j]))[0, 1]
            correlations.append(corr)

    correlation_penalty = -sum(correlations) / (len(correlations) + 1e-8) if correlations else 0
    return total_mse + lambda_param * correlation_penalty

def get_optimizer(model):
    return optim.SGD([
        {'params': model.features.parameters(), 'lr': 1e-5},  
        {'params': model.regressors.parameters(), 'lr': 1e-3}
    ], momentum=0.9, weight_decay=1e-3)

def get_scheduler(optimizer):
    return optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

def custom_collate(batch):
    max_height = max([item[0].shape[1] for item in batch])
    max_width = max([item[0].shape[2] for item in batch])

    resized_images = []
    resized_density_maps = []
    for image, density_map in batch:
        image = F.interpolate(image.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        image = image.squeeze(0)

        density_map = F.interpolate(density_map.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        density_map = density_map.squeeze(0)

        resized_images.append(image)
        resized_density_maps.append(density_map)

    return torch.stack(resized_images), torch.stack(resized_density_maps)

def train_model(model, train_dataloader, test_dataloader, num_epochs=40, lambda_param=0.001, save_path='model_checkpoint.pth'):
    model = model.to(device)
    optimizer = get_optimizer(model)
    scheduler = get_scheduler(optimizer)
    best_mae = float('inf')
    early_stop_patience = 5
    no_improve_epochs = 0

    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        for images, density_maps in train_dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = negative_correlation_loss(outputs, density_maps, lambda_param)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataloader)}")

        model.eval()
        mae, rmse = evaluate_model(model, test_dataloader)

        if mae < best_mae:
            best_mae = mae
            no_improve_epochs = 0
            # torch.save(model.state_dict(), f'{save_path}_epoch_{epoch+1}.pth')
            # print(f"Model saved as {save_path}_epoch_{epoch+1}.pth")
        else:
            no_improve_epochs += 1

        if no_improve_epochs >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step()

def evaluate_model(model, dataloader):
    model.eval()
    mae, rmse = 0.0, 0.0
    with torch.no_grad():
        for images, density_maps in dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            outputs = model(images)
            avg_output = sum(outputs) / len(outputs)

            mae += torch.abs(avg_output.sum() - density_maps.sum()).item()
            rmse += ((avg_output.sum() - density_maps.sum()) ** 2).item()

    mae /= len(dataloader)
    rmse = (rmse / len(dataloader)) ** 0.5
    print(f"Validation MAE: {mae}, Validation RMSE: {rmse}")
    return mae, rmse

def cross_validate_model(model, dataset, num_epochs=40, k_folds=5, lambda_param=0.001):
    kfold = KFold(n_splits=k_folds, shuffle=True)
    fold_results = {'mae': [], 'rmse': []}
    
    for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
        print(f'FOLD {fold + 1}/{k_folds}')
        print('--------------------------------')

        train_subset = Subset(dataset, train_ids)
        test_subset = Subset(dataset, test_ids)

        train_dataloader = DataLoader(train_subset, batch_size=4, shuffle=True, num_workers=4, collate_fn=custom_collate)
        test_dataloader = DataLoader(test_subset, batch_size=4, shuffle=False, num_workers=4, collate_fn=custom_collate)

        model_fold = DConvNet_v1(pretrained=True)
        model_fold = model_fold.to(device)

        train_model(model_fold, train_dataloader, test_dataloader, num_epochs, lambda_param, save_path=f'model_checkpoint_fold_{fold + 1}.pth')

        mae, rmse = evaluate_model(model_fold, test_dataloader)
        fold_results['mae'].append(mae)
        fold_results['rmse'].append(rmse)

        print(f'Fold {fold + 1} Results - MAE: {mae}, RMSE: {rmse}')
        print('--------------------------------')

    avg_mae = np.mean(fold_results['mae'])
    avg_rmse = np.mean(fold_results['rmse'])

    print(f'\nCross-validation Results:')
    print(f'Average MAE: {avg_mae}')
    print(f'Average RMSE: {avg_rmse}')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Path to the folder containing both images and density maps
data_dir = '/kaggle/input/ucf-cc-50-with-people-density-map/UCF_CC_50'

data_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

dataset = CrowdDataset(data_dir, transform=data_transforms)

model = DConvNet_v1(pretrained=True)

cross_validate_model(model, dataset, num_epochs=40, k_folds=5, lambda_param=0.001)


FOLD 1/5
--------------------------------
Epoch 1/40, Loss: 0.003496061731129885
Validation MAE: 447.00665283203125, Validation RMSE: 465.6704899479173
Epoch 2/40, Loss: 0.003222417028155178
Validation MAE: 437.75767008463544, Validation RMSE: 455.806433925996
Epoch 3/40, Loss: 0.00293611359084025
Validation MAE: 425.164311726888, Validation RMSE: 442.67174456644506
Epoch 4/40, Loss: 0.0027060977881774306
Validation MAE: 420.38197326660156, Validation RMSE: 438.0140031175754
Epoch 5/40, Loss: 0.002379937598016113
Validation MAE: 409.29469299316406, Validation RMSE: 426.07588238726134
Epoch 6/40, Loss: 0.00233909641392529
Validation MAE: 411.86363728841144, Validation RMSE: 428.53469600148287
Epoch 7/40, Loss: 0.0022839986020699145
Validation MAE: 405.69276936848956, Validation RMSE: 422.2940295526724
Epoch 8/40, Loss: 0.0022706508636474608
Validation MAE: 403.6186777750651, Validation RMSE: 420.2694014458662
Epoch 9/40, Loss: 0.0022955797030590475
Validation MAE: 398.4171091715495, Val