In [17]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
import h5py
import torch.nn.functional as F
import random

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(38) 
class CrowdDataset(Dataset):
    def __init__(self, image_dir, density_dir, transform=None):
        self.image_dir = image_dir
        self.density_dir = density_dir
        self.image_filenames = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])
        density_path = os.path.join(self.density_dir, self.image_filenames[idx].replace('.jpg', '.h5'))

        image = Image.open(img_path).convert('RGB')

        with h5py.File(density_path, 'r') as hf:
            density_map = np.array(hf['density'])

        if self.transform:
            image = self.transform(image)

        density_map = torch.from_numpy(density_map).unsqueeze(0).float() 

        return image, density_map

class DConvNet_v1(nn.Module):
    def __init__(self, pretrained=True, num_regressors=3):
        super(DConvNet_v1, self).__init__()

        vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

        self.features = nn.Sequential(*list(vgg16.features.children())[:23])

        self.features.add_module('pool4', nn.MaxPool2d(kernel_size=2, stride=1, padding=0))

        self.features.add_module('dilated_conv5_1', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('bn5_1', nn.BatchNorm2d(512))
        self.features.add_module('relu5_1', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_2', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('bn5_2', nn.BatchNorm2d(512))
        self.features.add_module('relu5_2', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_3', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('bn5_3', nn.BatchNorm2d(512))
        self.features.add_module('relu5_3', nn.ReLU(inplace=True))

        self.regressors = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(512, 64, kernel_size=1, groups=64),
                nn.ReLU(inplace=True),
                nn.Dropout(0.3), 
                nn.Conv2d(64, 1, kernel_size=1)  
            ) for _ in range(num_regressors)
        ])
        self.regressors.apply(self.init_weights)

    def forward(self, x):
        x = self.features(x)
        
        outputs = [regressor(x) for regressor in self.regressors]
        return outputs
    def init_weights(self, m):
        if isinstance(m, nn.Conv2d):
            torch.nn.init.xavier_uniform_(m.weight)
            if m.bias is not None:
                m.bias.data.fill_(0.01)

def negative_correlation_loss(outputs, target, lambda_param=0.001):
    mse_loss = nn.MSELoss()
    target = target.to(outputs[0].device)

    total_mse = sum([mse_loss(F.interpolate(output, size=target.shape[2:], mode='bilinear', align_corners=False), target) for output in outputs]) / len(outputs)
    correlations = []
    for i in range(len(outputs)):
        for j in range(i + 1, len(outputs)):
            o_i = outputs[i].view(-1)
            o_j = outputs[j].view(-1)
            corr = torch.corrcoef(torch.stack([o_i, o_j]))[0, 1]
            correlations.append(corr)

    correlation_penalty = -sum(correlations) / (len(correlations) + 1e-8) if correlations else 0
    return total_mse + lambda_param * correlation_penalty

def get_optimizer(model):
    return optim.SGD([
        {'params': model.features.parameters(), 'lr': 1e-5},  
        {'params': model.regressors.parameters(), 'lr': 1e-3}  
    ], momentum=0.9, weight_decay=1e-4) 

def get_scheduler(optimizer):
    return optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

data_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)), 
    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
    transforms.RandomPerspective(distortion_scale=0.2),   
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

def custom_collate(batch):
    max_height = max([item[0].shape[1] for item in batch])
    max_width = max([item[0].shape[2] for item in batch])

    resized_images = []
    resized_density_maps = []
    for image, density_map in batch:
        image = F.interpolate(image.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        image = image.squeeze(0)

        density_map = F.interpolate(density_map.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        density_map = density_map.squeeze(0)

        resized_images.append(image)
        resized_density_maps.append(density_map)

    return torch.stack(resized_images), torch.stack(resized_density_maps)

def train_model(model, train_dataloader, test_dataloader, num_epochs=20, lambda_param=0.001, save_path='model_checkpoint.pth'):
    model = model.to(device)
    optimizer = get_optimizer(model)
    scheduler = get_scheduler(optimizer)
    best_mae = float('inf')
    early_stop_patience = 5 
    no_improve_epochs = 0

    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        for images, density_maps in train_dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            optimizer.zero_grad()
            outputs = model(images) 
            loss = negative_correlation_loss(outputs, density_maps, lambda_param)  
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
            optimizer.step() 

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataloader)}")

        model.eval()
        
        mae, rmse = evaluate_model(model, test_dataloader)
        if mae < best_mae:
            best_mae = mae
            best_rmse=rmse
            no_improve_epochs = 0
        else:
            no_improve_epochs += 1

        if no_improve_epochs >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step()
    print(f" MAE: {best_mae}, Validation RMSE: {best_rmse}")
    
def evaluate_model(model, dataloader):
    model.eval()
    mae, rmse = 0.0, 0.0
    with torch.no_grad():
        for images, density_maps in dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            outputs = model(images)  
            avg_output = sum(outputs) / len(outputs)

            mae += torch.abs(avg_output.sum() - density_maps.sum()).item()
            rmse += ((avg_output.sum() - density_maps.sum()) ** 2).item()

    mae /= len(dataloader)
    rmse = (rmse / len(dataloader)) ** 0.5
    return mae, rmse

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset paths (Update these to your local paths)
train_image_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/train_data/images'
train_density_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/train_data/ground-truth-h5'

test_image_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/test_data/images'
test_density_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/test_data/ground-truth-h5'

train_dataset = CrowdDataset(train_image_dir, train_density_dir, transform=data_transforms)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=custom_collate)

test_dataset = CrowdDataset(test_image_dir, test_density_dir, transform=data_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=custom_collate)

model = DConvNet_v1(pretrained=True)

train_model(model, train_dataloader, test_dataloader, num_epochs=20, lambda_param=0.001, save_path='model_checkpoint')

Epoch 1/20, Loss: 0.10095436438918114
Epoch 2/20, Loss: 0.04933089483529329
Epoch 3/20, Loss: 0.04395542748272419
Epoch 4/20, Loss: 0.03932452891021967
Epoch 5/20, Loss: 0.0361172978207469
Epoch 6/20, Loss: 0.03362361527979374
Epoch 7/20, Loss: 0.03214953804388642
Epoch 8/20, Loss: 0.031102775260806084
Early stopping at epoch 8
 MAE: 24.975480115866358, Validation RMSE: 32.255527951092915
