In [1]:
import os

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from sklearn.metrics import classification_report, confusion_matrix

print(torch.__version__)

1.10.1+cpu


In [38]:
data_path = "./data"
data_path_zebra = os.path.join(data_path, "zebra_dataset")
data_path_without = os.path.join(data_path_zebra, "without")
data_path_with = os.path.join(data_path_zebra, "with")

device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
# Collected image paths and labels
images = []
labels = []

label_without = 0
label_with = 1

# Collect paths of images WITHOUT pedestrians
for image_name in os.listdir(data_path_without):
    images.append(os.path.join(data_path_without, image_name))
    labels.append(label_without)

# Collect paths of images WITH pedestrians
for image_name in os.listdir(data_path_with):
    images.append(os.path.join(data_path_with, image_name))
    labels.append(label_without)

images[:10]

['./data\\zebra_dataset\\without\\0_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1000_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1001_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1002_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1003_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1004_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1005_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1006_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1007_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1008_cam_image_array_.jpg']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.1, shuffle=True, random_state=2022)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True, random_state=2022)

In [42]:
class PedestrianDataset(Dataset):
    def __init__(self, img_paths, img_labels, transform=None):
        self.img_paths = img_paths
        self.img_labels = img_labels
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image_bgr = cv2.imread(self.img_paths[idx])
        while image_bgr is None:  # Failed to load the image
            idx += 1
            image_bgr = cv2.imread(self.img_paths[idx])

        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        if self.transform:
            image_rgb = self.transform(image_rgb)

        label = torch.Tensor([self.img_labels[idx]])

        return image_rgb, label

In [24]:
def get_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0

    for data, _ in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data ** 2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [28]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = PedestrianDataset(X_train, y_train, transform=data_transform)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

mean, std = get_mean_std(train_dataloader)
print(f"Mean: {mean}")
print(f"Std: {std}")

Mean: tensor([-0.0003, -0.0005, -0.0003])
Std: tensor([1.0001, 1.0000, 0.9998])


In [43]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4251, 0.4787, 0.4311],
        std=[0.2203, 0.2276, 0.2366]
    ),
])


train_dataset = PedestrianDataset(X_train, y_train, transform=data_transform)
val_dataset = PedestrianDataset(X_val, y_val, transform=data_transform)
test_dataset = PedestrianDataset(X_test, y_test, transform=data_transform)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [44]:
class PedestrianDetector(nn.Module):
    def __init__(self):
        super(PedestrianDetector, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
        self.dropout1 = nn.Dropout2d(p=0.1)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3)
        self.dropout2 = nn.Dropout2d(p=0.1)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3)
        self.dropout3 = nn.Dropout2d(p=0.1)

        self.fc = nn.Linear(3744, 1)

    def forward(self, x):
        x = torch.relu(torch.max_pool2d(self.dropout1(self.conv1(x)), 2))
        x = torch.relu(torch.max_pool2d(self.dropout2(self.conv2(x)), 2))
        x = torch.relu(torch.max_pool2d(self.dropout3(self.conv3(x)), 2))
        x = torch.sigmoid(self.fc(x.view(x.shape[0], -1)))
        return x


model = PedestrianDetector()
model.train()
model.to(device)
print(model)

image_batch, label_batch = next(iter(train_dataloader))
print(image_batch.shape)
print(label_batch.shape)

model(image_batch)

PedestrianDetector(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout2d(p=0.1, inplace=False)
  (conv2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
  (dropout2): Dropout2d(p=0.1, inplace=False)
  (conv3): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
  (dropout3): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=3744, out_features=1, bias=True)
)
torch.Size([64, 3, 120, 160])
torch.Size([64, 1])


tensor([[0.5535],
        [0.5062],
        [0.5253],
        [0.5214],
        [0.5070],
        [0.5205],
        [0.5148],
        [0.4954],
        [0.5200],
        [0.5109],
        [0.5113],
        [0.5121],
        [0.5165],
        [0.5200],
        [0.5172],
        [0.5144],
        [0.5218],
        [0.5326],
        [0.5146],
        [0.4985],
        [0.5111],
        [0.5013],
        [0.5249],
        [0.5197],
        [0.5210],
        [0.5177],
        [0.5238],
        [0.5006],
        [0.5092],
        [0.5176],
        [0.4924],
        [0.5249],
        [0.5025],
        [0.5087],
        [0.4969],
        [0.5248],
        [0.5179],
        [0.5188],
        [0.5176],
        [0.5337],
        [0.5509],
        [0.5134],
        [0.4950],
        [0.5160],
        [0.5267],
        [0.4957],
        [0.5063],
        [0.5005],
        [0.5255],
        [0.5094],
        [0.5056],
        [0.5082],
        [0.5084],
        [0.5369],
        [0.5382],
        [0

In [46]:
n_epochs = 10

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.008)

for epoch in range(n_epochs):
    print(f"Epoch: {epoch}")
    for step, (image_batch, label_batch) in enumerate(train_dataloader):
        print(f"Step: {step}")

        image_batch, label_batch = image_batch.to(device), label_batch.to(device)

        optimizer.zero_grad()

        pred = model(image_batch)

        loss = criterion(pred, label_batch)

        loss.backward()
        optimizer.step()

Epoch: 0
Step: 0
Step: 1
Step: 2
Step: 3
Step: 4
Step: 5
Step: 6


KeyboardInterrupt: 

In [47]:
torch.save(model, "model.pth")