In [19]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch.nn.functional as F


In [20]:

class CrowdDataset(Dataset):
    def __init__(self, img_dir, den_dir):
        self.img_dir = img_dir
        self.den_dir = den_dir
        self.img_files = sorted([f for f in os.listdir(img_dir) if f.endswith("_img.npy")])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_name = self.img_files[idx]
        den_name = img_name.replace("_img.npy", "_den.npy")

        img_path = os.path.join(self.img_dir, img_name)
        den_path = os.path.join(self.den_dir, den_name)

        img = np.load(img_path)
        den = np.load(den_path)

        img = torch.tensor(img, dtype=torch.float32)
        den = torch.tensor(den, dtype=torch.float32).unsqueeze(0)  # (1,512,512)

        # ðŸ”¥ FIX â€” downsample density map for CSRNet output
        den = F.interpolate(den.unsqueeze(0), size=(64, 64), mode='nearest').squeeze(0)

        return img, den


In [21]:
train_img_path = r"C:\Users\sneha\.cache\kagglehub\datasets\tthien\shanghaitech\versions\1\ShanghaiTech\part_A\images_npy"
train_den_path = r"C:\Users\sneha\.cache\kagglehub\datasets\tthien\shanghaitech\versions\1\ShanghaiTech\part_A\density_maps_npy"

dataset = CrowdDataset(train_img_path, train_den_path)
train_loader = DataLoader(dataset, batch_size=1, shuffle=True)


In [22]:
class CSRNet(nn.Module):
    def __init__(self):
        super(CSRNet, self).__init__()

        self.frontend = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),

            nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True)
        )

        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, dilation=2, padding=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, dilation=2, padding=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, 3, dilation=2, padding=2), nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, dilation=2, padding=2), nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, dilation=2, padding=2), nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, 1)
        )

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        return x


In [23]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CSRNet().to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)


In [24]:
import time
from tqdm import tqdm

num_epochs = 50

batch_losses = []
epoch_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    start_time = time.time()

    print(f"\nEpoch {epoch+1}/{num_epochs}")
    pbar = tqdm(train_loader, ncols=120)

    for batch_idx, (img, den) in enumerate(pbar):

        # Move to device
        img = img.to(device)
        den = den.to(device)

        # Forward + backward
        optimizer.zero_grad()
        output = model(img)
        loss = criterion(output, den)
        loss.backward()
        optimizer.step()

        # Logging
        batch_losses.append(loss.item())
        epoch_loss += loss.item()

        # --- Update tqdm bar like Keras ---
        pbar.set_postfix({
            "batch": f"{batch_idx+1}/{len(train_loader)}",
            "loss": f"{loss.item():.4f}"
        })

    # Compute avg loss
    epoch_loss /= len(train_loader)
    epoch_losses.append(epoch_loss)

    epoch_time = time.time() - start_time

    print(f"Epoch {epoch+1}/{num_epochs} - "
          f"loss: {epoch_loss:.4f} - "
          f"time: {epoch_time:.2f}s")


Epoch 1/50


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 300/300 [45:21<00:00,  9.07s/it] 


Epoch Loss: 0.0010
Epoch 2/50


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 300/300 [42:38<00:00,  8.53s/it]


Epoch Loss: 0.0001
Epoch 3/50


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 300/300 [40:16<00:00,  8.06s/it]


Epoch Loss: 0.0001
Epoch 4/50


  8%|â–Š         | 25/300 [02:28<27:17,  5.95s/it]


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), "csrnet_partA_weights.pth")
print("Saved as csrnet_partA_weights.pth")


In [None]:
plt.figure(figsize=(10,4))
plt.plot(batch_losses)
plt.title("Batch Loss")
plt.show()

plt.figure(figsize=(10,4))
plt.plot(epoch_losses)
plt.title("Epoch Loss")
plt.show()


In [26]:
# Cell 1
import os
import numpy as np
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F

# Path to the folder containing IMG_{i}_img.npy and IMG_{i}_den.npy
DATA_DIR = r"C:\Users\sneha\.cache\kagglehub\datasets\tthien\shanghaitech\versions\1\ShanghaiTech\part_A\train_data\processed"   # <-- CHANGE this to your output_dir from preprocessing

# training config
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 4         # reduce if OOM
lr = 1e-5
num_epochs = 50
print_every = 20
save_best_path = "csrnet_best_partA.pth"
checkpoint_path = "csrnet_checkpoint.pth"


In [27]:
# Cell 2
class CrowdNPYDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        # find image files using pattern
        self.img_files = sorted([f for f in os.listdir(data_dir) if f.endswith("_img.npy")])

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, idx):
        img_name = self.img_files[idx]
        base = img_name.replace("_img.npy", "")
        img_path = os.path.join(self.data_dir, f"{base}_img.npy")
        den_path = os.path.join(self.data_dir, f"{base}_den.npy")

        img = np.load(img_path).astype(np.float32)          # expected (H,W,3) or (3,H,W)
        den = np.load(den_path).astype(np.float32)          # expected (H_ds, W_ds) e.g., (64,64)

        # If image is (H,W,3) -> convert to (3,H,W)
        if img.ndim == 3 and img.shape[2] == 3:
            img = img.transpose(2,0,1)

        # Normalize image to 0-1
        img = img / 255.0

        img_t = torch.from_numpy(img).float()
        den_t = torch.from_numpy(den).float().unsqueeze(0)   # (1, H_ds, W_ds)

        return img_t, den_t

# quick dataset sanity
dataset = CrowdNPYDataset(DATA_DIR)
print("Samples found:", len(dataset))
sample_img, sample_den = dataset[0]
print("Sample shapes:", sample_img.shape, sample_den.shape)


Samples found: 300
Sample shapes: torch.Size([3, 512, 512]) torch.Size([1, 512, 512])


In [30]:
# Cell 3
import torchvision.models as models

class CSRNet(nn.Module):
    def __init__(self, load_pretrained_frontend=True):
        super(CSRNet, self).__init__()
        # use VGG16 frontend (features up to conv4_3)
        vgg = models.vgg16(weights=None)
        frontend_layers = list(vgg.features.children())[:23]  # up to conv4_3 pool
        self.frontend = nn.Sequential(*frontend_layers)

        # backend (dilated convs)
        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, padding=2, dilation=2), nn.ReLU(inplace=True),
        )

        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        return x

# create model
model = CSRNet().to(device)
print("Model created on", device)


Model created on cpu


In [31]:
# Cell 4
# split dataset (80/20)
total = len(dataset)
train_len = int(0.8 * total)
val_len = total - train_len
train_ds, val_ds = random_split(dataset, [train_len, val_len])

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=1, pin_memory=True)

print(f"Train: {len(train_ds)}, Val: {len(val_ds)}")


Train: 240, Val: 60


In [32]:
# Cell 5
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)

def count_from_density(tensor):
    # tensor: numpy array or torch tensor
    if isinstance(tensor, torch.Tensor):
        return tensor.sum().item()
    return np.sum(tensor)


In [33]:
# Cell 6
best_val_mae = float("inf")
train_losses = []
val_losses = []

for epoch in range(1, num_epochs+1):
    model.train()
    epoch_loss = 0.0
    start = time.time()

    pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}", ncols=120)
    for batch_idx, (imgs, dens) in enumerate(pbar, 1):
        imgs = imgs.to(device)            # (B,3,H,W)
        dens = dens.to(device)            # (B,1,h,w) e.g. (B,1,64,64)

        optimizer.zero_grad()
        outputs = model(imgs)

        # if output and dens sizes don't match, resize outputs
        if outputs.shape != dens.shape:
            outputs = F.interpolate(outputs, size=dens.shape[2:], mode='bilinear', align_corners=False)

        loss = criterion(outputs, dens)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        pbar.set_postfix({"batch_loss": f"{loss.item():.6f}"})

    avg_train_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # Validation (MAE & val loss)
    model.eval()
    val_loss = 0.0
    mae_sum = 0.0
    rmse_sum = 0.0
    with torch.no_grad():
        for imgs, dens in val_loader:
            imgs = imgs.to(device)
            dens = dens.to(device)
            preds = model(imgs)
            if preds.shape != dens.shape:
                preds = F.interpolate(preds, size=dens.shape[2:], mode='bilinear', align_corners=False)

            val_loss += criterion(preds, dens).item()
            # for counting, upsample patch to original ds size if needed â€” here both are ds
            pred_count = preds.sum().item()
            gt_count = dens.sum().item()
            mae_sum += abs(pred_count - gt_count)
            rmse_sum += (pred_count - gt_count)**2

    avg_val_loss = val_loss / len(val_loader)
    val_losses.append(avg_val_loss)
    MAE = mae_sum / len(val_loader)
    RMSE = (rmse_sum / len(val_loader))**0.5

    epoch_time = time.time() - start
    print(f"Epoch {epoch}/{num_epochs} - loss: {avg_train_loss:.6f} - val_loss: {avg_val_loss:.6f} - MAE: {MAE:.3f} - RMSE: {RMSE:.3f} - time: {epoch_time:.1f}s")

    # save checkpoint every epoch (optional) / save best
    torch.save({
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "train_losses": train_losses,
        "val_losses": val_losses,
    }, checkpoint_path)

    if MAE < best_val_mae:
        best_val_mae = MAE
        torch.save(model.state_dict(), save_best_path)
        print(f"Saved best model (MAE={best_val_mae:.3f}) -> {save_best_path}")


Epoch 1/50:   0%|                                                                                | 0/60 [00:10<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 7940, 8680) exited unexpectedly

In [None]:
import os
import numpy as np

root = r"C:\Users\sneha\.cache\kagglehub\datasets\tthien\shanghaitech\versions\1\ShanghaiTech\part_A\train_data\processed"

for file in os.listdir(root):
    if file.endswith(".npy"):
        path = os.path.join(root, file)
        try:
            d = np.load(path)
        except Exception as e:
            print("Corrupted:", path, e)
    


NameError: name 'img_tensor' is not defined

In [None]:
# Cell 7
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(train_losses, label="train_loss")
plt.plot(val_losses, label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.legend()

plt.subplot(1,2,2)
plt.plot(train_losses, label="train")
plt.plot(val_losses, label="val")
plt.xlabel("Epoch")
plt.title("Loss curves")
plt.show()


In [1]:
import os
import numpy as np

data_dir = r"C:\Users\sneha\.cache\kagglehub\datasets\tthien\shanghaitech\versions\1\ShanghaiTech\part_A\train_data\processed_fixed"

img_files = sorted([f for f in os.listdir(data_dir) if f.endswith("_img.npy")])
den_files = sorted([f for f in os.listdir(data_dir) if f.endswith("_den.npy")])

for img_file, den_file in zip(img_files, den_files):
    img = np.load(os.path.join(data_dir, img_file))
    den = np.load(os.path.join(data_dir, den_file))
    print(f"{img_file}: {img.shape}, {den_file}: {den.shape}")


IMG_100_img.npy: (3, 512, 512), IMG_100_den.npy: (1, 64, 64)
IMG_101_img.npy: (3, 512, 512), IMG_101_den.npy: (1, 64, 64)
IMG_102_img.npy: (3, 512, 512), IMG_102_den.npy: (1, 64, 64)
IMG_103_img.npy: (3, 512, 512), IMG_103_den.npy: (1, 64, 64)
IMG_104_img.npy: (3, 512, 512), IMG_104_den.npy: (1, 64, 64)
IMG_105_img.npy: (3, 512, 512), IMG_105_den.npy: (1, 64, 64)
IMG_106_img.npy: (3, 512, 512), IMG_106_den.npy: (1, 64, 64)
IMG_107_img.npy: (3, 512, 512), IMG_107_den.npy: (1, 64, 64)
IMG_108_img.npy: (3, 512, 512), IMG_108_den.npy: (1, 64, 64)
IMG_109_img.npy: (3, 512, 512), IMG_109_den.npy: (1, 64, 64)
IMG_10_img.npy: (3, 512, 512), IMG_10_den.npy: (1, 64, 64)
IMG_110_img.npy: (3, 512, 512), IMG_110_den.npy: (1, 64, 64)
IMG_111_img.npy: (3, 512, 512), IMG_111_den.npy: (1, 64, 64)
IMG_112_img.npy: (3, 512, 512), IMG_112_den.npy: (1, 64, 64)
IMG_113_img.npy: (3, 512, 512), IMG_113_den.npy: (1, 64, 64)
IMG_114_img.npy: (3, 512, 512), IMG_114_den.npy: (1, 64, 64)
IMG_115_img.npy: (3, 512, 