In [1]:
%matplotlib notebook

In [21]:
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights
from torchvision.models import vgg19_bn, VGG19_BN_Weights
from torchvision.io import read_image
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [18]:
filepath = "train/PXL_20201101_154113387_jpg.rf.5986f049935f2858946577b2c7c6fa7e.jpg"
weights = SSDLite320_MobileNet_V3_Large_Weights.DEFAULT
model = ssdlite320_mobilenet_v3_large(weights=weights)
model.eval()

pic = read_image(filepath)

preprocess = weights.transforms()
batch = preprocess(pic).unsqueeze(0)

predictions = model(x)[0]

boxes = predictions["boxes"][:4]
labels = [weights.meta["categories"][i] for i in predictions["labels"][:4]]

print(labels)

box = draw_bounding_boxes(pic, boxes=boxes,
                          labels=labels,
                          colors="red",
                          width=2)
im = to_pil_image(box.detach())
im.show()

['refrigerator', 'book', 'book', 'refrigerator']
Opening in existing browser session.


In [14]:
filepath = "train/PXL_20201101_154113387_jpg.rf.5986f049935f2858946577b2c7c6fa7e.jpg"
vggweights = VGG19_BN_Weights.DEFAULT
vggmodel = vgg19_bn(weights=vggweights)
vggmodel.eval()

img = read_image(filepath)
# Step 2: Initialize the inference transforms
preprocess = vggweights.transforms()
batch = preprocess(img).unsqueeze(0)

# Step 4: Use the model and print the predicted category
prediction = vggmodel(batch).squeeze(0).softmax(0)

class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = vggweights.meta["categories"][class_id]
print(f"{category_name}: {100 * score:.1f}%")

crate: 52.0%


In [19]:
class CustomImageDataset(Dataset):
    def __init__(self, numpydata, numpylabels, transform=None, target_transform=None):
        self.imgs = numpydata
        self.img_labels = numpylabels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        label = self.img_labels[idx]
        image = self.imgs[idx]
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [None]:
yes_x = np.empty((1,24,24))
no_x = np.empty((1,24,24))

for filename in glob.glob('sorted_imgs/box/*.*'):
    img = np.array([cv.imread(filename, 0)])/255 + 1
    no_x = np.concatenate((no_x, img))

for filename in glob.glob('sorted_imgs/no_box/*.*'):
    img = np.array([cv.imread(filename, 0)])/255 + 1
    yes_x = np.concatenate((yes_x, img))

yes_x = yes_x[1:]
no_x = no_x[1:]

p_split = int(len(yes_x))*0.8
np_split = int(len(no_x))*0.8

train_x = np.vstack((no_x[:np_split], yes_x[:p_split]))
valid_x = np.vstack((no_x[np_split:], yes_x[p_split:]))

train_y = [0] * np_split + [1] * p_split
valid_y = [0] * (len(no_x) - np_split) + [1] * (len(yes_x) - p_split)

train_x = torch.tensor(train_x, dtype=torch.float32)
valid_x = torch.tensor(valid_x, dtype=torch.float32)

train_y = torch.tensor(train_y, dtype=torch.float32)
valid_y = torch.tensor(valid_y, dtype=torch.float32)

data = CustomImageDataset(train_x, train_y)
valid_data = CustomImageDataset(valid_x, valid_y)

trainloader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, shuffle=True)

loaders = {
    "train":trainloader,
    "val":valloader
}

In [15]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "./cropped_imgs"

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "vgg"

# Number of classes in the dataset
num_classes = 2

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for
num_epochs = 15

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = False

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vggmodel.parameters(), lr=0.001)
epochs = 25

# train_model(
#     model,
#     dataloaders,
#     criterion,
#     optimizer,
#     num_epochs=25,
#     is_inception=False,
# )

train_model(vggmodel, loaders, criterion, optimizer, num_epochs=epochs)