In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import scipy.io as sio


In [None]:
base = "/kaggle/input/shanghaitech/ShanghaiTech"

partA_train_img = base + "/part_A/train_data/images"
partA_train_gt  = base + "/part_A/train_data/ground-truth"

print("Images:", len(os.listdir(partA_train_img)))
print("Ground truth files:", len(os.listdir(partA_train_gt)))


In [None]:
img_paths = sorted(glob.glob(os.path.join(partA_train_img, "*.jpg")))
sample_img_path = img_paths[265]     # first image
sample_img_path


In [None]:
img = Image.open(sample_img_path).convert('RGB')
img_np = np.array(img)

basename = os.path.basename(sample_img_path).replace(".jpg", "")
mat_path = os.path.join(partA_train_gt, "GT_" + basename + ".mat")
mat = sio.loadmat(mat_path)

# Extract points (x, y)
points = mat["image_info"][0][0][0][0][0]   # MATLAB's annoying nested indexing
points = np.array(points)
print("Number of head annotations:", len(points))


In [None]:
img = Image.open(sample_img_path).convert('RGB')
img_np = np.array(img)

basename = os.path.basename(sample_img_path).replace(".jpg", "")
mat_path = os.path.join(partA_train_gt, "GT_" + basename + ".mat")
mat = sio.loadmat(mat_path)

# Extract points (x, y)
points = mat["image_info"][0][0][0][0][0]   # MATLAB's annoying nested indexing
points = np.array(points)
print("Number of head annotations:", len(points))


In [None]:
from scipy.ndimage import gaussian_filter

H, W = img_np.shape[:2]
density = np.zeros((H, W), dtype=np.float32)

for x, y in points:
    x = min(W - 1, max(0, int(x)))
    y = min(H - 1, max(0, int(y)))
    density[y, x] += 1

density = gaussian_filter(density, sigma=4)   # sigma=4 is standard for ShanghaiTech
print("Density sum (crowd count):", density.sum())


In [None]:
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.imshow(img_np)
plt.title("Original Image")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(density, cmap='jet')
plt.title(f"Density Map (count = {density.sum():.1f})")
plt.axis("off")

plt.show()


In [None]:
save_dir = "/kaggle/working/partA_train_density"
os.makedirs(save_dir, exist_ok=True)

np.save(os.path.join(save_dir, basename + ".npy"), density)


In [None]:
crowd_count = density.sum()
print("Crowd count:", crowd_count)


In [None]:
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.imshow(img_np)
plt.title("Original Image")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(density, cmap='jet')
plt.title(f"Density Map (count = {crowd_count:.1f})")
plt.axis("off")

plt.show()


In [None]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
import scipy.io as sio
from scipy.ndimage import gaussian_filter

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [None]:
base = "/kaggle/input/shanghaitech"

# auto-detect Part A folders
partA_train_img = glob.glob(os.path.join(base, "**", "part_A*", "train_data", "images"), recursive=True)[0]
partA_train_gt  = glob.glob(os.path.join(base, "**", "part_A*", "train_data", "ground*"), recursive=True)[0]

partA_test_img  = glob.glob(os.path.join(base, "**", "part_A*", "test_data", "images"), recursive=True)[0]
partA_test_gt   = glob.glob(os.path.join(base, "**", "part_A*", "test_data", "ground*"), recursive=True)[0]

print("Train img dir :", partA_train_img)
print("Train gt dir  :", partA_train_gt)
print("Test  img dir :", partA_test_img)
print("Test  gt dir  :", partA_test_gt)

print("Train JPG count:", len(glob.glob(os.path.join(partA_train_img, "*.jpg"))))
print("Test  JPG count:", len(glob.glob(os.path.join(partA_test_img, "*.jpg"))))


In [None]:
out_train_density = "/kaggle/working/partA_train_density"
out_test_density  = "/kaggle/working/partA_test_density"
os.makedirs(out_train_density, exist_ok=True)
os.makedirs(out_test_density, exist_ok=True)

def generate_density_map(img_shape, points, sigma=4):
    H, W = img_shape
    density = np.zeros((H, W), dtype=np.float32)
    if len(points) == 0:
        return density

    for x, y in points:
        x = min(W - 1, max(0, int(x)))
        y = min(H - 1, max(0, int(y)))
        density[y, x] += 1

    density = gaussian_filter(density, sigma=sigma)
    return density

def process_split(img_dir, gt_dir, out_dir):
    img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")))
    print(f"Processing {len(img_paths)} images in {img_dir}")
    for img_path in img_paths:
        basename = os.path.basename(img_path).replace(".jpg", "")
        out_path = os.path.join(out_dir, basename + ".npy")
        if os.path.exists(out_path):
            continue

        img = Image.open(img_path).convert('RGB')
        img_np = np.array(img)
        H, W = img_np.shape[:2]

        mat_path = os.path.join(gt_dir, "GT_" + basename + ".mat")
        mat = sio.loadmat(mat_path)

        # extract (x,y) points
        points = mat["image_info"][0][0][0][0][0]
        points = np.array(points)

        density = generate_density_map((H, W), points, sigma=4)
        np.save(out_path, density)
    print("Done:", out_dir)

process_split(partA_train_img, partA_train_gt, out_train_density)
process_split(partA_test_img,  partA_test_gt,  out_test_density)


In [None]:
class ShanghaiTechPartA(Dataset):
    def __init__(self, img_dir, density_dir, transform=None):
        self.img_dir = img_dir
        self.density_dir = density_dir
        self.transform = transform

        self.img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")))

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        basename = os.path.basename(img_path).replace(".jpg", "")

        # image
        img = Image.open(img_path).convert('RGB')

        if self.transform:
            img_t = self.transform(img)
        else:
            img_t = transforms.ToTensor()(img)

        # density
        density_path = os.path.join(self.density_dir, basename + ".npy")
        density = np.load(density_path).astype(np.float32)

        # IMPORTANT: create new storage tensor (not from_numpy shared)
        density_t = torch.tensor(density, dtype=torch.float32).unsqueeze(0)  # (1,H,W)

        count = density_t.sum()

        return img_t, density_t, count


In [None]:
class MCNN(nn.Module):
    def __init__(self):
        super().__init__()

        def make_branch(ks):
            return nn.Sequential(
                nn.Conv2d(3, 16, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.Conv2d(16, 32, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
                nn.Conv2d(32, 16, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.Conv2d(16, 8, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
            )

        self.branch1 = make_branch(9)
        self.branch2 = make_branch(7)
        self.branch3 = make_branch(5)

        # NOTE: no ReLU here – output is raw density
        self.fuse = nn.Conv2d(8 * 3, 1, kernel_size=1)

    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x_cat = torch.cat((x1, x2, x3), dim=1)
        out = self.fuse(x_cat)
        return out


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

transform = transforms.Compose([
    transforms.Resize((480, 640)),   # fixed input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = ShanghaiTechPartA(
    img_dir=partA_train_img,
    density_dir=out_train_density,
    transform=transform
)

test_dataset = ShanghaiTechPartA(
    img_dir=partA_test_img,
    density_dir=out_test_density,
    transform=transform
)

print("Train len:", len(train_dataset))
print("Test  len:", len(test_dataset))

# robust settings: batch_size=1, num_workers=0
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=0)
test_loader  = DataLoader(test_dataset,  batch_size=1, shuffle=False, num_workers=0)


In [None]:
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    criterion = nn.MSELoss()
    total_loss = 0.0

    for imgs, densities, _ in loader:
        imgs = imgs.to(device)
        densities = densities.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)  # (B,1,h_out,w_out)

        # resize GT density to match output spatial size
        if outputs.shape[-2:] != densities.shape[-2:]:
            densities_resized = F.interpolate(
                densities,
                size=outputs.shape[-2:],
                mode='bilinear',
                align_corners=False
            )
        else:
            densities_resized = densities

        loss = criterion(outputs, densities_resized)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * imgs.size(0)

    return total_loss / len(loader.dataset)


@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    mae = 0.0
    mse = 0.0
    n = 0

    for imgs, densities, _ in loader:
        imgs = imgs.to(device)
        densities = densities.to(device)

        preds = model(imgs)

        # predicted count from output density
        pred_counts = preds.sum(dim=[1, 2, 3]).cpu()

        # GT count from original density
        gt_counts   = densities.sum(dim=[1, 2, 3]).cpu()

        diff = pred_counts - gt_counts
        mae += diff.abs().sum().item()
        mse += (diff ** 2).sum().item()
        n += imgs.size(0)

    mae /= n
    rmse = (mse / n) ** 0.5
    return mae, rmse


In [None]:
model = MCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 50  # adjust as you like
history = {"train_loss": [], "mae": [], "rmse": []}

for epoch in range(1, num_epochs + 1):
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    mae, rmse = evaluate(model, test_loader, device)

    history["train_loss"].append(train_loss)
    history["mae"].append(mae)
    history["rmse"].append(rmse)

    print(f"Epoch {epoch:03d}: loss={train_loss:.6f}, MAE={mae:.2f}, RMSE={rmse:.2f}")

torch.save(model.state_dict(), "/kaggle/working/mcnn_partA.pth")
print("Model saved to /kaggle/working/mcnn_partA.pth")


In [None]:
# one sample from test set
imgs, dens, _ = next(iter(test_loader))

model.eval()
with torch.no_grad():
    preds = model(imgs.to(device))

gt_count   = dens.sum().item()
pred_count = preds.sum().item()

print(f"GT count   : {gt_count:.2f}")
print(f"Pred count : {pred_count:.2f}")

# prepare for plotting
img_np = imgs[0].permute(1, 2, 0).cpu().numpy()
img_np = (img_np * [0.229, 0.224, 0.225]) + [0.485, 0.456, 0.406]
img_np = img_np.clip(0, 1)

gt_den   = dens[0, 0].cpu().numpy()
pred_den = preds[0, 0].cpu().numpy()

plt.figure(figsize=(15, 4))

plt.subplot(1, 3, 1)
plt.imshow(img_np)
plt.title("Image")
plt.axis("off")

plt.subplot(1, 3, 2)
plt.imshow(gt_den, cmap='jet')
plt.title(f"GT density (count={gt_count:.1f})")
plt.axis("off")

plt.subplot(1, 3, 3)
plt.imshow(pred_den, cmap='jet')
plt.title(f"Pred density (count={pred_count:.1f})")
plt.axis("off")

plt.tight_layout()
plt.show()


In [None]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image
import scipy.io as sio
from scipy.ndimage import gaussian_filter

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models


In [None]:
# ShanghaiTech base
base = "/kaggle/input/shanghaitech"

# auto-detect Part_A paths
partA_train_img = glob.glob(os.path.join(base, "**", "part_A*", "train_data", "images"), recursive=True)[0]
partA_train_gt  = glob.glob(os.path.join(base, "**", "part_A*", "train_data", "ground*"), recursive=True)[0]

partA_test_img  = glob.glob(os.path.join(base, "**", "part_A*", "test_data", "images"), recursive=True)[0]
partA_test_gt   = glob.glob(os.path.join(base, "**", "part_A*", "test_data", "ground*"), recursive=True)[0]

print("Train img dir :", partA_train_img)
print("Train gt dir  :", partA_train_gt)
print("Test  img dir :", partA_test_img)
print("Test  gt dir  :", partA_test_gt)
print("Train JPG count:", len(glob.glob(os.path.join(partA_train_img, '*.jpg'))))
print("Test  JPG count :", len(glob.glob(os.path.join(partA_test_img, '*.jpg'))))

# your uploaded VGG16_bn weights
vgg_path = "/kaggle/input/vgg-data/vgg16_bn-6c64b313.pth"
assert os.path.exists(vgg_path), "vgg16_bn-6c64b313.pth not found at /kaggle/input/vgg-data/"
print("VGG weights path:", vgg_path)


In [None]:
out_train_density = "/kaggle/working/partA_train_density"
out_test_density  = "/kaggle/working/partA_test_density"
os.makedirs(out_train_density, exist_ok=True)
os.makedirs(out_test_density, exist_ok=True)

def generate_density_map(img_shape, points, sigma=4):
    H, W = img_shape
    density = np.zeros((H, W), dtype=np.float32)
    if len(points) == 0:
        return density
    for x, y in points:
        x = min(W - 1, max(0, int(x)))
        y = min(H - 1, max(0, int(y)))
        density[y, x] += 1
    density = gaussian_filter(density, sigma=sigma)
    return density

def process_split(img_dir, gt_dir, out_dir):
    img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")))
    print(f"Processing {len(img_paths)} images in {img_dir}")
    for img_path in img_paths:
        basename = os.path.basename(img_path).replace(".jpg", "")
        out_path = os.path.join(out_dir, basename + ".npy")
        if os.path.exists(out_path):
            continue

        img = Image.open(img_path).convert('RGB')
        img_np = np.array(img)
        H, W = img_np.shape[:2]

        mat_path = os.path.join(gt_dir, "GT_" + basename + ".mat")
        mat = sio.loadmat(mat_path)
        points = mat["image_info"][0][0][0][0][0]
        points = np.array(points)

        density = generate_density_map((H, W), points, sigma=4)
        np.save(out_path, density)
    print("Done:", out_dir)

process_split(partA_train_img, partA_train_gt, out_train_density)
process_split(partA_test_img,  partA_test_gt,  out_test_density)


In [None]:
class ShanghaiTechPartA(Dataset):
    def __init__(self, img_dir, density_dir, transform=None):
        self.img_dir = img_dir
        self.density_dir = density_dir
        self.transform = transform
        self.img_paths = sorted(glob.glob(os.path.join(img_dir, "*.jpg")))

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        basename = os.path.basename(img_path).replace(".jpg", "")

        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img_t = self.transform(img)
        else:
            img_t = transforms.ToTensor()(img)

        density_path = os.path.join(self.density_dir, basename + ".npy")
        density = np.load(density_path).astype(np.float32)
        density_t = torch.tensor(density, dtype=torch.float32).unsqueeze(0)  # (1,H,W)

        count = density_t.sum()
        return img_t, density_t, count


In [None]:
c

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

transform = transforms.Compose([
    transforms.Resize((480, 640)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

train_dataset = ShanghaiTechPartA(
    img_dir=partA_train_img,
    density_dir=out_train_density,
    transform=transform
)

test_dataset = ShanghaiTechPartA(
    img_dir=partA_test_img,
    density_dir=out_test_density,
    transform=transform
)

print("Train len:", len(train_dataset))
print("Test  len:", len(test_dataset))

# batch_size=1 avoids size issues
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=0)
test_loader  = DataLoader(test_dataset,  batch_size=1, shuffle=False, num_workers=0)


In [None]:
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    criterion = nn.MSELoss()
    total_loss = 0.0

    for imgs, densities, _ in loader:
        imgs = imgs.to(device)
        densities = densities.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)

        # resize GT density to match output spatial size
        if outputs.shape[-2:] != densities.shape[-2:]:
            densities_resized = F.interpolate(
                densities,
                size=outputs.shape[-2:],
                mode='bilinear',
                align_corners=False
            )
        else:
            densities_resized = densities

        loss = criterion(outputs, densities_resized)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * imgs.size(0)

    return total_loss / len(loader.dataset)


@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    mae = 0.0
    mse = 0.0
    n = 0

    for imgs, densities, _ in loader:
        imgs = imgs.to(device)
        densities = densities.to(device)

        preds = model(imgs)

        pred_counts = preds.sum(dim=[1, 2, 3]).cpu()
        gt_counts   = densities.sum(dim=[1, 2, 3]).cpu()

        diff = pred_counts - gt_counts
        mae += diff.abs().sum().item()
        mse += (diff ** 2).sum().item()
        n += imgs.size(0)

    mae /= n
    rmse = (mse / n) ** 0.5
    return mae, rmse


In [None]:
model_csr = CSRNet(vgg_weight_path=vgg_path).to(device)
optimizer_csr = torch.optim.Adam(model_csr.parameters(), lr=1e-5)

num_epochs = 50   # you can later increase to 50
history_csr = {"train_loss": [], "mae": [], "rmse": []}

for epoch in range(1, num_epochs + 1):
    train_loss = train_one_epoch(model_csr, train_loader, optimizer_csr, device)
    mae, rmse = evaluate(model_csr, test_loader, device)

    history_csr["train_loss"].append(train_loss)
    history_csr["mae"].append(mae)
    history_csr["rmse"].append(rmse)

    print(f"[CSRNet] Epoch {epoch:03d}: loss={train_loss:.6f}, MAE={mae:.2f}, RMSE={rmse:.2f}")

torch.save(model_csr.state_dict(), "/kaggle/working/csrnet_partA_fromscratch.pth")
print("Saved CSRNet to /kaggle/working/csrnet_partA_fromscratch.pth")


In [None]:
import os, glob
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, models
import pandas as pd


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

# model checkpoints
MCNN_CKPT   = "/kaggle/input/csrnet-and-mcnn-models/mcnn_model.pth"
CSRNET_CKPT = "/kaggle/input/csrnet-model/csrnet_partA_finetuned.pth"

# (optional) VGG16_bn pretrained file (used during training)
VGG_PTH     = "/kaggle/input/vgg-data/vgg16_bn-6c64b313.pth"

# real-world dataset root
REAL_ROOT   = "/kaggle/input/real-world"
print("Real-world root:", REAL_ROOT)


In [None]:
# ---------------- MCNN ----------------

class MCNN(nn.Module):
    def __init__(self):
        super().__init__()

        def make_branch(ks):
            return nn.Sequential(
                nn.Conv2d(3, 16, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.Conv2d(16, 32, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
                nn.Conv2d(32, 16, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.Conv2d(16, 8, kernel_size=ks, padding=ks//2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2),
            )

        self.branch1 = make_branch(9)
        self.branch2 = make_branch(7)
        self.branch3 = make_branch(5)
        # final 1x1 conv, no ReLU to allow any density
        self.fuse    = nn.Conv2d(8 * 3, 1, kernel_size=1)

    def forward(self, x):
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        x_cat = torch.cat((x1, x2, x3), dim=1)
        out = self.fuse(x_cat)
        return out


# ---------------- CSRNet ----------------

class CSRNet(nn.Module):
    def __init__(self, vgg_weight_path=None):
        super().__init__()

        # Build VGG16_bn frontend; if vgg_weight_path is given, load weights.
        vgg = models.vgg16_bn(weights=None)
        if vgg_weight_path is not None and os.path.exists(vgg_weight_path):
            state = torch.load(vgg_weight_path, map_location="cpu")
            vgg.load_state_dict(state)
        features = list(vgg.features.children())

        # Frontend: conv1_1 to conv4_3 (33 layers)
        self.frontend = nn.Sequential(*features[:33])

        # Backend: dilated convolutions (CSRNet)
        self.backend = nn.Sequential(
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, 3, padding=2, dilation=2),
            nn.ReLU(inplace=True),
        )

        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        return x



In [None]:
# Load MCNN weights
mcnn = MCNN().to(device)
mcnn_state = torch.load(MCNN_CKPT, map_location=device)
mcnn.load_state_dict(mcnn_state)
mcnn.eval()
print("Loaded MCNN from", MCNN_CKPT)

# Load CSRNet weights (architecture + VGG init, then trained weights)
csrnet = CSRNet(vgg_weight_path=VGG_PTH).to(device)
csr_state = torch.load(CSRNET_CKPT, map_location=device)
csrnet.load_state_dict(csr_state)
csrnet.eval()
print("Loaded CSRNet from", CSRNET_CKPT)


In [None]:
transform_infer = transforms.Compose([
    transforms.Resize((480, 640)),  # same as training
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

img_paths = []
for ext in ("*.jpg", "*.jpeg", "*.png"):
    img_paths.extend(glob.glob(os.path.join(REAL_ROOT, "**", ext), recursive=True))
img_paths = sorted(img_paths)

print("Total real-world images found:", len(img_paths))
if len(img_paths) == 0:
    raise RuntimeError("No images found under /kaggle/input/real-world/**")


In [None]:
gt_map = {}  # basename -> gt_count

csv_path = os.path.join(REAL_ROOT, "counts.csv")
if os.path.exists(csv_path):
    print("Found counts.csv at", csv_path)
    df_gt = pd.read_csv(csv_path)
    print("counts.csv columns:", df_gt.columns.tolist())
    print(df_gt.head())

    # try to guess the filename and gt columns
    img_col_candidates = ["image", "img", "filename", "file", "name"]
    gt_col_candidates  = ["gt", "gt_count", "count", "people", "num_people"]

    img_col = next((c for c in img_col_candidates if c in df_gt.columns), None)
    gt_col  = next((c for c in gt_col_candidates  if c in df_gt.columns), None)

    if img_col is None or gt_col is None:
        print("⚠ Could not automatically detect GT columns. "
              "Will skip GT for now.")
    else:
        for _, row in df_gt.iterrows():
            basename = os.path.basename(str(row[img_col]))
            gt_map[basename] = float(row[gt_col])
        print(f"Loaded GT counts for {len(gt_map)} images from counts.csv.")
else:
    print("No counts.csv found, proceeding without GT.")


In [None]:
def safety_label(count, low=50, high=150):
    """
    Simple safety heuristic:
      count < low         -> 'SAFE'
      low <= count < high -> 'WARNING'
      count >= high       -> 'DANGER'
    Adjust 'low' and 'high' based on your use-case.
    """
    if count < low:
        return "SAFE"
    elif count < high:
        return "WARNING"
    else:
        return "DANGER"


In [None]:
results = []

for idx, img_path in enumerate(img_paths):
    img = Image.open(img_path).convert("RGB")
    img_t = transform_infer(img).unsqueeze(0).to(device)

    with torch.no_grad():
        den_mcnn   = mcnn(img_t)
        den_csrnet = csrnet(img_t)

        count_mcnn   = float(den_mcnn.sum().item())
        count_csrnet = float(den_csrnet.sum().item())

    basename = os.path.basename(img_path)
    rel      = os.path.relpath(img_path, REAL_ROOT)
    scene    = rel.split(os.sep)[0]  # top-level folder: beach_1, subway_2, etc.

    gt_count = gt_map.get(basename, np.nan)
    label    = safety_label(count_csrnet)

    results.append({
        "scene": scene,
        "image": basename,
        "full_path": img_path,
        "gt_count": gt_count,
        "mcnn_count": count_mcnn,
        "csrnet_count": count_csrnet,
        "error_mcnn": (count_mcnn - gt_count) if not np.isnan(gt_count) else np.nan,
        "error_csrnet": (count_csrnet - gt_count) if not np.isnan(gt_count) else np.nan,
        "safety_label": label,
    })

    # visualize first few examples
    if idx < 3:
        img_np = np.array(img)
        den_np = den_csrnet[0, 0].cpu().numpy()

        plt.figure(figsize=(11,4))
        plt.subplot(1,2,1)
        plt.imshow(img_np)
        plt.title(f"{scene} | {basename}")
        plt.axis("off")

        plt.subplot(1,2,2)
        plt.imshow(den_np, cmap="jet")
        plt.title(f"CSRNet count = {count_csrnet:.1f} ({label})")
        plt.axis("off")

        plt.tight_layout()
        plt.show()

len(results)


In [None]:
df = pd.DataFrame(results)
csv_out = "/kaggle/working/real_world_mcnn_csrnet_results.csv"
df.to_csv(csv_out, index=False)
print("Saved results CSV to:", csv_out)

display(df.head())


In [None]:
if df["gt_count"].notna().any():
    mask = df["gt_count"].notna()
    mae_mcnn = (df.loc[mask, "error_mcnn"].abs()).mean()
    mae_csr  = (df.loc[mask, "error_csrnet"].abs()).mean()

    rmse_mcnn = np.sqrt((df.loc[mask, "error_mcnn"]**2).mean())
    rmse_csr  = np.sqrt((df.loc[mask, "error_csrnet"]**2).mean())

    print(f"Real-world MAE  (MCNN) : {mae_mcnn:.2f}")
    print(f"Real-world RMSE (MCNN) : {rmse_mcnn:.2f}")
    print(f"Real-world MAE  (CSRNet): {mae_csr:.2f}")
    print(f"Real-world RMSE (CSRNet): {rmse_csr:.2f}")

    print("\nPer-scene CSRNet MAE:")
    display(df.loc[mask].groupby("scene")["error_csrnet"]
            .apply(lambda x: x.abs().mean()))
else:
    print("No GT counts available; skipping MAE/RMSE computation.")


In [None]:
print("\nSafety label distribution (CSRNet-based):")
display(df["safety_label"].value_counts())

print("\nAverage CSRNet count per scene:")
display(df.groupby("scene")["csrnet_count"].mean())


In [None]:
# if df is not already in memory, load it
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("/kaggle/working/real_world_mcnn_csrnet_results.csv")
df.head()


In [None]:
mask = df["gt_count"].notna()

gt  = df.loc[mask, "gt_count"].values
pred_mcnn  = df.loc[mask, "mcnn_count"].values
pred_csr   = df.loc[mask, "csrnet_count"].values

plt.figure(figsize=(6,6))
plt.scatter(gt, pred_mcnn, label="MCNN", alpha=0.6)
plt.scatter(gt, pred_csr,  label="CSRNet", alpha=0.6, marker="x")

max_val = max(gt.max(), pred_mcnn.max(), pred_csr.max())
plt.plot([0, max_val], [0, max_val], linestyle="--")  # y=x reference

plt.xlabel("Ground-truth count")
plt.ylabel("Predicted count")
plt.title("GT vs Predicted Counts (Real-world dataset)")
plt.legend()
plt.grid(True, linestyle=":")
plt.tight_layout()
plt.show()


In [None]:
mask = df["gt_count"].notna()

# per-scene MAE (absolute error mean)
scene_mae_mcnn = df.loc[mask].groupby("scene")["error_mcnn"].apply(lambda x: x.abs().mean())
scene_mae_csr  = df.loc[mask].groupby("scene")["error_csrnet"].apply(lambda x: x.abs().mean())

scenes = scene_mae_csr.index.tolist()
x = np.arange(len(scenes))
width = 0.35

plt.figure(figsize=(10,4))
plt.bar(x - width/2, scene_mae_mcnn.values, width, label="MCNN")
plt.bar(x + width/2, scene_mae_csr.values,  width, label="CSRNet")

plt.xticks(x, scenes, rotation=45, ha="right")
plt.ylabel("MAE (|pred - GT|)")
plt.title("Per-scene MAE on Real-world Dataset")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
mask = df["gt_count"].notna()

scene_gt_mean   = df.loc[mask].groupby("scene")["gt_count"].mean()
scene_pred_mean = df.loc[mask].groupby("scene")["csrnet_count"].mean()

scenes = scene_gt_mean.index.tolist()
x = np.arange(len(scenes))
width = 0.35

plt.figure(figsize=(10,4))
plt.bar(x - width/2, scene_gt_mean.values,   width, label="GT")
plt.bar(x + width/2, scene_pred_mean.values, width, label="CSRNet")

plt.xticks(x, scenes, rotation=45, ha="right")
plt.ylabel("Average count")
plt.title("Per-scene Average GT vs CSRNet Count")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
label_counts = df["safety_label"].value_counts()

plt.figure(figsize=(4,4))
plt.bar(label_counts.index, label_counts.values)
plt.xlabel("Safety label")
plt.ylabel("Number of frames")
plt.title("Safety Label Distribution (CSRNet-based)")
plt.tight_layout()
plt.show()


In [None]:
mask = df["gt_count"].notna()
errors_csr = df.loc[mask, "error_csrnet"]

plt.figure(figsize=(6,4))
plt.hist(errors_csr, bins=30)
plt.xlabel("Prediction error (CSRNet: pred - GT)")
plt.ylabel("Number of frames")
plt.title("Distribution of CSRNet Prediction Errors")
plt.tight_layout()
plt.show()
