In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
import h5py
import torch.nn.functional as F
import random

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # This will make training slower, but more reproducible

set_seed(38) 

# Custom Dataset Class to Handle Images and Precomputed Density Maps
class CrowdDataset(Dataset):
    def __init__(self, image_dir, density_dir, transform=None):
        self.image_dir = image_dir
        self.density_dir = density_dir
        self.image_filenames = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])
        density_path = os.path.join(self.density_dir, self.image_filenames[idx].replace('.jpg', '.h5'))

        # Load image
        image = Image.open(img_path).convert('RGB')

        # Load density map from .h5 file
        with h5py.File(density_path, 'r') as hf:
            density_map = np.array(hf['density'])

        # Apply transforms to the image
        if self.transform:
            image = self.transform(image)

        # Convert density map to a tensor
        density_map = torch.from_numpy(density_map).unsqueeze(0).float()  # Add channel dimension

        return image, density_map


# D-ConvNet-v1 Implementation
class DConvNet_v1(nn.Module):
    def __init__(self, pretrained=True, num_regressors=3):
        super(DConvNet_v1, self).__init__()

        # Load the VGG16 model
        vgg16 = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)

        # Modify the feature extractor (up to conv4_3)
        self.features = nn.Sequential(*list(vgg16.features.children())[:23])  # Up to conv4_3

        # Modify the fourth pooling layer (set stride to 1)
        self.features.add_module('pool4', nn.MaxPool2d(kernel_size=2, stride=1, padding=0))

        # Add dilated convolutions in place of the fifth pooling layer
        self.features.add_module('dilated_conv5_1', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_1', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_2', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_2', nn.ReLU(inplace=True))
        self.features.add_module('dilated_conv5_3', nn.Conv2d(512, 512, kernel_size=3, padding=2, dilation=2))
        self.features.add_module('relu5_3', nn.ReLU(inplace=True))

        # Define group convolutional layers for regression with dropout
        self.regressors = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(512, 64, kernel_size=1, groups=64),  # Group convolution
                nn.ReLU(inplace=True),
                nn.Dropout(0.3),  # Added dropout layer
                nn.Conv2d(64, 1, kernel_size=1)  # Final 1x1 convolution to get the density map
            ) for _ in range(num_regressors)
        ])

    def forward(self, x):
        # Pass through the modified VGG16 feature extractor
        x = self.features(x)
        
        # Apply the group convolutional regressors
        outputs = [regressor(x) for regressor in self.regressors]
        return outputs


# Loss Function (Euclidean Loss + Negative Correlation)
def negative_correlation_loss(outputs, target, lambda_param=0.001):
    mse_loss = nn.MSELoss()
    target = target.to(outputs[0].device)

    # Upsample to match the target size
    total_mse = sum([mse_loss(F.interpolate(output, size=target.shape[2:], mode='bilinear', align_corners=False), target) for output in outputs]) / len(outputs)

    # Calculate pairwise correlations between regressors
    correlations = []
    for i in range(len(outputs)):
        for j in range(i + 1, len(outputs)):
            o_i = outputs[i].view(-1)
            o_j = outputs[j].view(-1)
            corr = torch.corrcoef(torch.stack([o_i, o_j]))[0, 1]
            correlations.append(corr)

    correlation_penalty = -sum(correlations) / (len(correlations) + 1e-8) if correlations else 0
    return total_mse + lambda_param * correlation_penalty


# Optimizer Function
def get_optimizer(model):
    return optim.SGD([
        {'params': model.features.parameters(), 'lr': 1e-5},  # Smaller learning rate for feature extraction layers
        {'params': model.regressors.parameters(), 'lr': 1e-4}  # Larger learning rate for regressor layers
    ], momentum=0.9, weight_decay=1e-3)  # Increased weight decay to 1e-3

# Learning Rate Scheduler
def get_scheduler(optimizer):
    return optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


# Data augmentation and normalization
data_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),  # Increased rotation range
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),  # Random cropping
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Brightness and contrast adjustments
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])


# Custom collate function to resize images and density maps in each batch
def custom_collate(batch):
    max_height = max([item[0].shape[1] for item in batch])
    max_width = max([item[0].shape[2] for item in batch])

    resized_images = []
    resized_density_maps = []
    for image, density_map in batch:
        image = F.interpolate(image.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        image = image.squeeze(0)

        density_map = F.interpolate(density_map.unsqueeze(0), size=(max_height, max_width), mode='bilinear', align_corners=False)
        density_map = density_map.squeeze(0)

        resized_images.append(image)
        resized_density_maps.append(density_map)

    return torch.stack(resized_images), torch.stack(resized_density_maps)


# Training Loop with Model Saving and Evaluation
def train_model(model, train_dataloader, test_dataloader, num_epochs=20, lambda_param=0.001, save_path='model_checkpoint.pth'):
    model = model.to(device)
    optimizer = get_optimizer(model)
    scheduler = get_scheduler(optimizer)
    best_mae = float('inf')
    early_stop_patience = 5  # Increased patience for early stopping
    no_improve_epochs = 0

    for epoch in range(num_epochs):
        running_loss = 0.0
        model.train()
        for images, density_maps in train_dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            optimizer.zero_grad()
            outputs = model(images)  # Forward pass
            loss = negative_correlation_loss(outputs, density_maps, lambda_param)  # Loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_dataloader)}")

        # Evaluate the model after each epoch
        model.eval()
        mae, rmse = evaluate_model(model, test_dataloader)

        # Save the model if it improves
        if mae < best_mae:
            best_mae = mae
            no_improve_epochs = 0
            torch.save(model.state_dict(), f'{save_path}_epoch_{epoch+1}.pth')
            print(f"Model saved as {save_path}_epoch_{epoch+1}.pth")
        else:
            no_improve_epochs += 1

        # Early stopping
        if no_improve_epochs >= early_stop_patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        # Step the scheduler
        scheduler.step()


# Evaluation Function
def evaluate_model(model, dataloader):
    model.eval()
    mae, rmse = 0.0, 0.0
    with torch.no_grad():
        for images, density_maps in dataloader:
            images = images.to(device)
            density_maps = density_maps.to(device)

            outputs = model(images)  # Forward pass
            avg_output = sum(outputs) / len(outputs)

            mae += torch.abs(avg_output.sum() - density_maps.sum()).item()
            rmse += ((avg_output.sum() - density_maps.sum()) ** 2).item()

    mae /= len(dataloader)
    rmse = (rmse / len(dataloader)) ** 0.5
    print(f"Validation MAE: {mae}, Validation RMSE: {rmse}")
    return mae, rmse


# Configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Dataset paths (Update these to your local paths)
train_image_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/train_data/images'
train_density_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/train_data/ground-truth-h5'

test_image_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/test_data/images'
test_density_dir = '/kaggle/input/shanghaitech-with-people-density-map/ShanghaiTech/part_B/test_data/ground-truth-h5'

# Initialize the Dataset and DataLoader
train_dataset = CrowdDataset(train_image_dir, train_density_dir, transform=data_transforms)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4, collate_fn=custom_collate)

test_dataset = CrowdDataset(test_image_dir, test_density_dir, transform=data_transforms)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=False, num_workers=4, collate_fn=custom_collate)

# Model Initialization
model = DConvNet_v1(pretrained=True)

# Training the model and evaluating after each epoch
train_model(model, train_dataloader, test_dataloader, num_epochs=20, lambda_param=0.001, save_path='model_checkpoint')

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:02<00:00, 224MB/s] 


Epoch 1/20, Loss: 0.004469093352090567
Validation MAE: 85.8977326984647, Validation RMSE: 86.69504490549015
Model saved as model_checkpoint_epoch_1.pth
Epoch 2/20, Loss: 0.003640223166439682
Validation MAE: 71.2718930787678, Validation RMSE: 72.38968757589605
Model saved as model_checkpoint_epoch_2.pth
Epoch 3/20, Loss: 0.0031663927901536227
Validation MAE: 61.03608066220827, Validation RMSE: 62.21734985703183
Model saved as model_checkpoint_epoch_3.pth
Epoch 4/20, Loss: 0.002910363390110433
Validation MAE: 53.600651173651976, Validation RMSE: 54.881849279522186
Model saved as model_checkpoint_epoch_4.pth
Epoch 5/20, Loss: 0.002750130305066705
Validation MAE: 47.98938973342316, Validation RMSE: 49.58911963925525
Model saved as model_checkpoint_epoch_5.pth
Epoch 6/20, Loss: 0.002679771021939814
Validation MAE: 45.943983560876, Validation RMSE: 47.515429783845086
Model saved as model_checkpoint_epoch_6.pth
Epoch 7/20, Loss: 0.0026428696955554188
Validation MAE: 43.91480182695992, Validat

In [2]:
!pip freeze > requirements.txt


In [3]:
print("Torch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("h5py:", h5py.__version__)
print("Pillow:", PIL.__version__)
print("scikit-learn:", sklearn.__version__)
print("NumPy:", np.__version__)

Torch: 2.4.0


NameError: name 'torchvision' is not defined