In [1]:
import os

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import cv2
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, utils, datasets
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from sklearn.metrics import classification_report, confusion_matrix

print(torch.__version__)

1.10.1+cpu


In [38]:
data_path = "./data"
data_path_zebra = os.path.join(data_path, "zebra_dataset")
data_path_without = os.path.join(data_path_zebra, "without")
data_path_with = os.path.join(data_path_zebra, "with")

device = "cuda" if torch.cuda.is_available() else "cpu"

In [55]:
# Collected image paths and labels
images = []
labels = []

label_without = 0
label_with = 1

# Collect paths of images WITHOUT pedestrians
for image_name in os.listdir(data_path_without):
    images.append(os.path.join(data_path_without, image_name))
    labels.append(label_without)

# Collect paths of images WITH pedestrians
for image_name in os.listdir(data_path_with):
    images.append(os.path.join(data_path_with, image_name))
    labels.append(label_with)

images[:10]

['./data\\zebra_dataset\\without\\0_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1000_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1001_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1002_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1003_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1004_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1005_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1006_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1007_cam_image_array_.jpg',
 './data\\zebra_dataset\\without\\1008_cam_image_array_.jpg']

In [56]:
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.1, shuffle=True, random_state=2022)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=True, random_state=2022)

In [42]:
class PedestrianDataset(Dataset):
    def __init__(self, img_paths, img_labels, transform=None):
        self.img_paths = img_paths
        self.img_labels = img_labels
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image_bgr = cv2.imread(self.img_paths[idx])
        while image_bgr is None:  # Failed to load the image
            idx += 1
            image_bgr = cv2.imread(self.img_paths[idx])

        image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
        if self.transform:
            image_rgb = self.transform(image_rgb)

        label = torch.Tensor([self.img_labels[idx]])

        return image_rgb, label

In [24]:
def get_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0

    for data, _ in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data ** 2, dim=[0, 2, 3])
        num_batches += 1

    mean = channels_sum / num_batches
    std = (channels_squared_sum / num_batches - mean ** 2) ** 0.5

    return mean, std

In [28]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = PedestrianDataset(X_train, y_train, transform=data_transform)
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)

mean, std = get_mean_std(train_dataloader)
print(f"Mean: {mean}")
print(f"Std: {std}")

Mean: tensor([-0.0003, -0.0005, -0.0003])
Std: tensor([1.0001, 1.0000, 0.9998])


In [57]:
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4251, 0.4787, 0.4311],
        std=[0.2203, 0.2276, 0.2366]
    ),
])


train_dataset = PedestrianDataset(X_train, y_train, transform=data_transform)
val_dataset = PedestrianDataset(X_val, y_val, transform=data_transform)
test_dataset = PedestrianDataset(X_test, y_test, transform=data_transform)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [64]:
class PedestrianDetector(nn.Module):
    def __init__(self):
        super(PedestrianDetector, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
        # self.dropout1 = nn.Dropout2d(p=0.1)

        self.conv2 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3)
        # self.dropout2 = nn.Dropout2d(p=0.1)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=16, kernel_size=3)
        # self.dropout3 = nn.Dropout2d(p=0.1)

        self.fc = nn.Linear(3744, 1)

    def forward(self, x):
        x = torch.relu(torch.max_pool2d(self.conv1(x), 2))
        x = torch.relu(torch.max_pool2d(self.conv2(x), 2))
        x = torch.relu(torch.max_pool2d(self.conv3(x), 2))
        x = torch.sigmoid(self.fc(x.view(x.shape[0], -1)))
        return x


model = PedestrianDetector()
print(model)

image_batch, label_batch = next(iter(train_dataloader))
pred_batch = model(image_batch)

print(f"Image Batch Shape: {image_batch.shape}")
print(f"Label Batch Shape: {label_batch.shape}")
print(f"Predict Batch Shape: {pred_batch.shape}")

PedestrianDetector(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc): Linear(in_features=3744, out_features=1, bias=True)
)
Image Batch Shape: torch.Size([64, 3, 120, 160])
Label Batch Shape: torch.Size([64, 1])
Predict Batch Shape: torch.Size([64, 1])


In [73]:
def get_accuracy(y_prob, y_true, threshold=0.5):
    assert y_true.size() == y_prob.size()
    y_prob = y_prob > threshold
    return (y_true == y_prob).sum().item() / y_true.size(0)

In [74]:
n_epochs = 10
log_frequency = 10

model = PedestrianDetector()
model.to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

loss_history = []
accuracy_history = []

for epoch in range(n_epochs):
    running_loss = 0.0
    running_accuracy = 0.0
    model.train()
    for step, (image_batch, label_batch) in enumerate(train_dataloader):
        image_batch, label_batch = image_batch.to(device), label_batch.to(device)

        optimizer.zero_grad()

        pred = model(image_batch)

        loss = criterion(pred, label_batch)

        loss.backward()

        optimizer.step()

        current_loss = loss.item()
        current_accuracy = get_accuracy(pred, label_batch)

        running_loss += current_loss
        running_accuracy += current_accuracy
        if (step + 1) % log_frequency == 0:
            print(f'Train [{epoch + 1}/{n_epochs}, {step + 1}/{len(train_dataloader)}]: Loss {running_loss / log_frequency}, Accuracy {running_accuracy / log_frequency}')
            running_loss = 0.0
            running_accuracy = 0.0

    running_loss = 0
    model.eval()
    for step, (image_batch, label_batch) in enumerate(val_dataloader):
        image_batch, label_batch = image_batch.to(device), label_batch.to(device)

        pred = model(image_batch)

        loss = criterion(pred, label_batch)

        current_loss = loss.item()
        current_accuracy = get_accuracy(pred, label_batch)

        running_loss += current_loss
        running_accuracy += current_accuracy
        if (step + 1) % log_frequency == 0:
            print(f'Val [{epoch + 1}/{n_epochs}, {step + 1}/{len(val_dataloader)}]: Loss {running_loss / log_frequency}, Accuracy {running_accuracy / log_frequency}')
            running_loss = 0.0
            running_accuracy = 0.0

Train [1/10, 10/16]: Loss 0.6924334466457367, Accuracy 0.49375
Train [1/10, 20/16]: Loss 0.6871529877185821, Accuracy 0.5890625
Train [1/10, 30/16]: Loss 0.6823015809059143, Accuracy 0.55
Train [1/10, 40/16]: Loss 0.6700393497943878, Accuracy 0.6015625
Train [1/10, 50/16]: Loss 0.6602464914321899, Accuracy 0.6046875
Train [1/10, 60/16]: Loss 0.6385653614997864, Accuracy 0.815625
Val [1/10, 10/16]: Loss 0.6114684522151947, Accuracy 0.9359375
Train [2/10, 10/16]: Loss 0.6022695124149322, Accuracy 0.8171875
Train [2/10, 20/16]: Loss 0.5529086410999298, Accuracy 0.89375
Train [2/10, 30/16]: Loss 0.4959726482629776, Accuracy 0.9
Train [2/10, 40/16]: Loss 0.4144680380821228, Accuracy 0.89375
Train [2/10, 50/16]: Loss 0.34091065526008607, Accuracy 0.91875
Train [2/10, 60/16]: Loss 0.3351433649659157, Accuracy 0.8765625
Val [2/10, 10/16]: Loss 0.2715414762496948, Accuracy 1.0203125
Train [3/10, 10/16]: Loss 0.280699323117733, Accuracy 0.896875
Train [3/10, 20/16]: Loss 0.24406169205904008, Acc

KeyboardInterrupt: 

In [47]:
torch.save(model, "model.pth")