In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# create the dataset
class BusDataset(Dataset):

    def __init__(self, class_csv, image_path,max_data_loaded=100):
        self.bus_df = pd.read_csv(class_csv)
        # if the labels != bus, make it 0, else make it 1
        self.labels = []
        self.image_path = image_path
        self.image = []
        self.Image_len = 0
        self.Transform = transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Resize((256, 256)),
            torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        for file in os.listdir(self.image_path):
            if file.endswith('.jpg'):
                # take the tensor of the image
                img = self.Transform(plt.imread(os.path.join(self.image_path, file)))
                self.image.append(img)
                self.Image_len += 1
                # search for the LabelName but without .jpg
                label = self.bus_df.loc[self.bus_df['ImageID'] == file[:-4], 'LabelName'].values[0]
                self.labels.append(1 if label == 'Bus' else 0)
                if len(label) == 0:
                    self.labels.append(0)
            if self.Image_len == max_data_loaded:
                break
        print ("Label Size: ", len(self.labels))
        print ("Image Size: ", len(self.image))
        self.image = torch.stack(self.image)
        self.Image_len = len(self.image)

    def __len__(self):
        return self.Image_len

    def __getitem__(self, idx):
        return self.image[idx], self.labels[idx]

In [3]:
dataset = BusDataset('archive/df.csv', 'archive/images/images/', max_data_loaded=8000)

  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()


Label Size:  8000
Image Size:  8000


In [None]:
# get the data
train_loader = DataLoader(dataset, batch_size=200, shuffle=True)

In [None]:
# Define a Downsample Block
def downsample_block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.LeakyReLU(0.2)
    )

In [None]:
# define the network as a Convolutional Neural Network
class BusDetector(nn.Module):

    def __init__(self) -> None:
        super(BusDetector, self).__init__()
        # Input [3, 256, 256]
        self.conv1 = downsample_block(3, 64) # [3, 256, 256] -> [64, 128, 128]
        self.MaxPool1 = nn.MaxPool2d(kernel_size=2, stride=2) # [64, 128, 128] -> [64, 64, 64]
        self.conv2 = downsample_block(64, 128) # [64, 64, 64] -> [128, 32, 32]
        self.MaxPool2 = nn.MaxPool2d(kernel_size=2, stride=2) # [128, 32, 32] -> [128, 16, 16]
        self.conv3 = downsample_block(128, 256) # [128, 16, 16] -> [256, 8, 8]
        self.MaxPool3 = nn.MaxPool2d(kernel_size=2, stride=2) # [256, 8, 8] -> [256, 4, 4]
        self.conv4 = downsample_block(256, 512) # [256, 4, 4] -> [512, 2, 2]
        self.MaxPool4 = nn.MaxPool2d(kernel_size=2, stride=2) # [512, 2, 2] -> [512, 1, 1]
        # Fully Connected Layer and Flatten
        self.Flatten = nn.Flatten()
        self.fc1 = nn.Linear(512, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):
        out = self.MaxPool1(self.conv1(x)) # [3, 256, 256] -> [64, 128, 128] -> [64, 64, 64]
        out = self.MaxPool2(self.conv2(out)) # [64, 64, 64] -> [128, 32, 32] -> [128, 16, 16]
        out = self.MaxPool3(self.conv3(out)) # [128, 16, 16] -> [256, 8, 8] -> [256, 4, 4]
        out = self.MaxPool4(self.conv4(out)) # [256, 4, 4] -> [512, 2, 2] -> [512, 1, 1]
        out = self.Flatten(out) # [512, 1, 1] -> [512]
        out = F.relu(self.fc1(out)) # [512] -> [128]
        out = torch.sigmoid(self.fc2(out)) # [128] -> [1]
        return out


In [None]:
BusD = BusDetector()
#print(BusD)

In [None]:
# define the loss function and the optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(BusD.parameters(), lr=0.001)

In [None]:
# train the network
for epoch in range(10):
    epoch_loss = 0
    for i, data in enumerate(train_loader, 0):
        # get the inputs
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = BusD(inputs).squeeze()
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        #save the loss
        epoch_loss += loss.item()

    print("Epoch: ", epoch, " Loss: ", epoch_loss)


Epoch:  0  Loss:  10.141899704933167
Epoch:  1  Loss:  7.999439537525177
Epoch:  2  Loss:  6.418636292219162
Epoch:  3  Loss:  5.27546226978302
Epoch:  4  Loss:  3.7931689620018005
Epoch:  5  Loss:  2.237104669213295
Epoch:  6  Loss:  1.1597260981798172
Epoch:  7  Loss:  0.44545393623411655
Epoch:  8  Loss:  0.17925817845389247
Epoch:  9  Loss:  0.10648090043105185


In [108]:
# save the model
torch.save(BusD.state_dict(), 'BusDetector.pth')