### Set Data Path

In [5]:
from pathlib import Path

base_dir = Path("data")
train_dir = base_dir/Path("train")
validation_dir = base_dir/Path("validation")
test_dir = base_dir/Path("test")

### Image Transform Function

In [6]:
from torchvision import transforms

transform = transforms.Compose([ 
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(.5, .5, .5), std=(.5, .5, .5))
])

ModuleNotFoundError: No module named 'torchvision'

### Load Training Data (x: features, y: labels)

In [10]:
from PIL import Image

x, y = [], []
for file_name in train_dir.glob("*.jpg"):
    bounding_box_file = file_name.with_suffix('.txt')
    
    with open(bounding_box_file) as file:
        lines = file.readlines()
        if(len(lines) > 1):
            continue
        else:
            line = lines[0].strip('\n')
            (classes, cen_x, cen_y, box_w, box_h) = list(map(float, line.split(' ')))
            torch_data = torch.FloatTensor([cen_x, cen_y, box_w, box_h])
            y.append(torch_data)
            
    img = Image.open(str(file_name)).convert('RGB')
    img = transform(img)
    x.append(img)

### Put Training Data into Torch Loader

In [12]:
import torch.utils.data as Data

tensor_x = torch.stack(x)
tensor_y = torch.stack(y)
torch_dataset = Data.TensorDataset(tensor_x, tensor_y)
loader = Data.DataLoader(dataset=torch_dataset, batch_size=32, shuffle=True, num_workers=2)

### Load Pretrained RestNet18 Model

In [4]:
import torchvision

model = torchvision.models.resnet18(pretrained=True)
fc_in_size = model.fc.in_features
model.fc = nn.Linear(fc_in_size, 4)
model = model.cuda()

### Parameters

In [13]:
EPOCH = 10
LR = 1e-3

### Loss Function & Optimizer

In [14]:
loss_func = nn.SmoothL1Loss().cuda()
opt = torch.optim.Adam(model.parameters(), lr=LR)

### Training

In [17]:
for epoch in range(EPOCH):
    for step, (batch_x, batch_y) in enumerate(loader):
        batch_x = batch_x.cuda()
        batch_y = batch_y.cuda()
        output = model(batch_x)
        loss = loss_func(output, batch_y)
        opt.zero_grad()
        loss.backward()
        opt.step()
        
        if(step % 5 == 0):
            print("Epoch {} | Step {} | Loss {}".format(epoch, step, loss))

Epoch 0 | Step 0 | Loss 0.15668310225009918
Epoch 0 | Step 5 | Loss 0.11711937189102173
Epoch 0 | Step 10 | Loss 0.05173167586326599
Epoch 1 | Step 0 | Loss 0.01010880246758461
Epoch 1 | Step 5 | Loss 0.00739856343716383
Epoch 1 | Step 10 | Loss 0.008690240792930126
Epoch 2 | Step 0 | Loss 0.008737225085496902
Epoch 2 | Step 5 | Loss 0.00662753451615572
Epoch 2 | Step 10 | Loss 0.009796814993023872
Epoch 3 | Step 0 | Loss 0.005545835010707378
Epoch 3 | Step 5 | Loss 0.007509482093155384
Epoch 3 | Step 10 | Loss 0.004612468648701906
Epoch 4 | Step 0 | Loss 0.0042313477024436
Epoch 4 | Step 5 | Loss 0.006238226778805256
Epoch 4 | Step 10 | Loss 0.005332221742719412
Epoch 5 | Step 0 | Loss 0.005516073666512966
Epoch 5 | Step 5 | Loss 0.009235391393303871
Epoch 5 | Step 10 | Loss 0.004736854694783688
Epoch 6 | Step 0 | Loss 0.007654199842363596
Epoch 6 | Step 5 | Loss 0.003848954336717725
Epoch 6 | Step 10 | Loss 0.00264316750690341
Epoch 7 | Step 0 | Loss 0.0036661745980381966
Epoch 7 | S

### Show some of the Prediction

In [20]:
%matplotlib inline
import cv2
from matplotlib import pyplot as plt
import numpy as np

model = model.cpu()
for batch_x, batch_y in loader:
    predict = model(batch_x)
    for x, pred, y in zip(batch_x, predict, batch_y):
        (pos_x, pos_y, box_w, box_h) = pred
        pos_x *= 224
        pos_y *= 224
        box_w *= 224
        box_h *= 224
        
        image = transforms.ToPILImage()(x)
        img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
        img = cv2.rectangle(img, (pos_x - box_w/2, pos_y - box_h/2), (pos_x + box_w/2, pos_y + box_h/2), (255, 0, 0), 3)
        
        plt.imshow(img)
        plt.show()
    break

----------------------------------------------------------------------------------------------------


RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead.