In [1]:
import os
import cv2
import numpy as np
import scipy.io as sio
from tqdm import tqdm

In [2]:
IMG_DIR = "data/ShanghaiTech/part_B/train_data/images"
GT_DIR  = "data/ShanghaiTech/part_B/train_data/ground-truth"
OUTPUT_IMG_DIR = "processed_B_images"
OUTPUT_GT_DIR  = "processed_B_points"

os.makedirs(OUTPUT_IMG_DIR, exist_ok=True)
os.makedirs(OUTPUT_GT_DIR, exist_ok=True)

In [3]:
TARGET_SIZE = (512, 512)  # width, height

def scale_points(points, orig_w, orig_h, new_w, new_h):
    scaled = []
    scale_x = new_w / orig_w
    scale_y = new_h / orig_h
    
    for x, y in points:
        scaled.append([x * scale_x, y * scale_y])
    return np.array(scaled)

for img_name in tqdm(os.listdir(IMG_DIR)):
    if img_name.endswith(".jpg"):
        
        img_path = os.path.join(IMG_DIR, img_name)
        img = cv2.imread(img_path)
        orig_h, orig_w = img.shape[:2]

        resized_img = cv2.resize(img, TARGET_SIZE)
        
        mat_path = os.path.join(GT_DIR, "GT_" + img_name.replace(".jpg", ".mat"))
        mat = sio.loadmat(mat_path)
        points = mat["image_info"][0][0][0][0][0]

        points_scaled = scale_points(points, orig_w, orig_h, TARGET_SIZE[0], TARGET_SIZE[1])

        save_img_path = os.path.join(OUTPUT_IMG_DIR, img_name)
        cv2.imwrite(save_img_path, resized_img)

        save_points_path = os.path.join(OUTPUT_GT_DIR, img_name.replace(".jpg", ".npy"))
        np.save(save_points_path, points_scaled)

100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [00:31<00:00, 12.71it/s]


In [4]:
from scipy.ndimage import gaussian_filter

In [5]:
IMG_DIR = "processed_B_images"      
POINTS_DIR = "processed_B_points"     
OUTPUT_DENSITY_DIR = "density_B_maps"

os.makedirs(OUTPUT_DENSITY_DIR, exist_ok=True)

In [6]:
def generate_density_map(points, h, w, sigma=4):
    density = np.zeros((h, w), dtype=np.float32)
    
    for x, y in points:
        if 0 <= int(x) < w and 0 <= int(y) < h:
            density[int(y), int(x)] = 1
    
    density = gaussian_filter(density, sigma=sigma)
    return density

In [7]:
for img_name in tqdm(os.listdir(IMG_DIR)):
    if img_name.endswith(".jpg"):
        
        img_path = os.path.join(IMG_DIR, img_name)
        img = cv2.imread(img_path)
        h, w = img.shape[:2]

        points_path = os.path.join(POINTS_DIR, img_name.replace(".jpg", ".npy"))
        points = np.load(points_path)

        density_map = generate_density_map(points, h, w, sigma=4)

        np.save(os.path.join(OUTPUT_DENSITY_DIR, img_name.replace(".jpg", ".npy")), density_map)

100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [00:37<00:00, 10.80it/s]


In [8]:
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

In [9]:
# paths
IMG_DIR = "processed_B_images"
DENSITY_DIR = "density_B_maps"

In [10]:
transform = T.Compose([
    T.ToTensor(),  # Convert HWC image to CHW tensor (0–1)
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])   # ImageNet normalization
])

In [11]:
class CrowdDataset(Dataset):
    def __init__(self, img_dir, density_dir, transform=None):
        self.img_dir = img_dir
        self.density_dir = density_dir
        self.transform = transform
        self.images = [f for f in os.listdir(img_dir) if f.endswith(".jpg")]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]

        # Load image
        img_path = os.path.join(self.img_dir, img_name)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Apply transform
        if self.transform:
            img = self.transform(img)

        # Load density
        density_path = os.path.join(self.density_dir, img_name.replace(".jpg", ".npy"))
        density = np.load(density_path)

        # Downsample density map (factor 8)
        density_down = cv2.resize(density, (density.shape[1]//8, density.shape[0]//8))

        # Multiply by 64 to preserve total count
        density_down = density_down * 64

        # Convert to tensor
        density_tensor = torch.tensor(density_down, dtype=torch.float32).unsqueeze(0)

        return img, density_tensor


In [12]:
dataset = CrowdDataset(IMG_DIR, DENSITY_DIR, transform)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

print("Total samples:", len(dataset))

# Example test load
img, den = dataset[0]
print("Image tensor shape:", img.shape)
print("Density map shape:", den.shape)
print("Estimated count:", den.sum().item())

Total samples: 400
Image tensor shape: torch.Size([3, 512, 512])
Density map shape: torch.Size([1, 64, 64])
Estimated count: 233.12474060058594
