In [23]:
from torchvision.models import vgg19_bn, VGG19_BN_Weights
from torchvision.io import read_image
from torchvision.datasets import ImageFolder
import torchvision

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import numpy as np
import glob
import cv2 as cv
import skimage.io as io
import time
import copy
from PIL import Image

In [24]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [25]:
vggweights = VGG19_BN_Weights.DEFAULT
vggmodel = vgg19_bn(weights=vggweights)
vggmodel = vggmodel.eval()
vggmodel.to(device)
print()




In [26]:
def pilPad (im):
    desired_size = 256
    old_size = im.size  # old_size[0] is in (width, height) format

    ratio = float(desired_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])

    im = im.resize(new_size, Image.Resampling.LANCZOS)
#     im = im.resize(new_size, Resampling.LANCZOS)
    # create a new image and paste the resized on it

    new_im = Image.new("RGB", (desired_size, desired_size))
    new_im.paste(im, ((desired_size-new_size[0])//2,
                        (desired_size-new_size[1])//2))
    return new_im

In [27]:
class custom_fix_image(object):
    def __init__(self):
        pass
    
    def __call__(self, img):
        img = pilPad(img)
        return img

    def __repr__(self):
        return "Padding and resizing image"
    
class convertToCuda(object):
    def __init__(self, device):
        self.device = device
    
    def __call__(self, t):
        t_device = t.to(device=self.device)
        return t_device

    def __repr__(self):
        return "Changes tensor to device"

In [28]:
train_transforms = torchvision.transforms.Compose([
    custom_fix_image(),
    torchvision.transforms.ColorJitter(hue=.05, saturation=.05),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomRotation(20),
    torchvision.transforms.ToTensor(),
    convertToCuda(device=device)
])

val_transforms = torchvision.transforms.Compose([
    custom_fix_image(),
    torchvision.transforms.ToTensor(),
    convertToCuda(device=device)
])

train_folder_dataset = ImageFolder('./sorted_imgs/train/', transform=train_transforms)
valid_folder_dataset = ImageFolder('./sorted_imgs/validation/', transform=val_transforms)

In [29]:
batch_size = 1

trainloader = torch.utils.data.DataLoader(train_folder_dataset, batch_size=batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader(valid_folder_dataset, batch_size=batch_size, shuffle=True)

loaders = {
    "train":trainloader,
    "val":valloader
}

In [42]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=1, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over batches from dataloaders.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device).long()
                
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    input(" ")

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            input("Stop")
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [43]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vggmodel.parameters(), lr=0.001)

model, val_acc_history = train_model(vggmodel, loaders, criterion, optimizer)

Epoch 0/0
----------
 
 


KeyboardInterrupt: Interrupted by user

In [35]:
from torchvision.models.detection import ssd300_vgg16, SSD300_VGG16_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

In [32]:
# Step 1: Initialize model with the best available weights
weights = SSD300_VGG16_Weights.DEFAULT
# model = fasterrcnn_resnet50_fpn_v2(weights=weights, box_score_thresh=0.9)
model = ssd300_vgg16(weights=weights)
# model.to('cuda')
model = model.eval()

[W NNPACK.cpp:51] Could not initialize NNPACK! Reason: Unsupported hardware.
Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /home/alanxw/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100.0%


In [86]:
def save_cropped_objects (filepath, num_imgs, id):
    img = read_image(filepath)
    img = img[:3,:,:]
    preprocess = weights.transforms()
    batch = [preprocess(img)]
    prediction = model(batch)[0]
    labels = [weights.meta["categories"][i] for i in prediction["labels"]]
    im = to_pil_image(img)
    for i in range(num_imgs):
        coords = prediction["boxes"][i+1].tolist()
        #print(coords)
        coords = [int(i) for i in coords]
        
        l = coords[2]-coords[0]
        h = coords[3]-coords[1]
        d = abs(l-h)
        
        o = d//2
        r = d%2
        newbox = coords
        if l>h:
            newbox = (coords[0], coords[1]-o-r, coords[2], coords[3]+o)
        elif l<h:
            newbox = (coords[0]-o-r, coords[1], coords[2]+o, coords[3])
        im1 = im.crop(newbox)
        im1.save("cropped_imgs/robo" + str(id) + "_" + str(i) + ".jpg")

In [87]:
filepath = 'original/'
for i, filename in enumerate(glob.glob(filepath + '*.*')):
    save_cropped_objects(filename, 5, i)