In [5]:
import fiftyone as fo
import numpy as np

dataset = fo.zoo.load_zoo_dataset(
              "open-images-v7",
              split="train",
              label_types=["detections"],
              classes=["Cat"],
            #   max_samples=10,
          )

Downloading split 'train' to '/Users/ryan.wong/fiftyone/open-images-v7/train' if necessary
Necessary images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'open-images-v7-train'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import cv2

In [9]:
batch_size = 100
n_samples = len(dataset)


x_size = 267
y_size = 326
# (267 x 326) is the number of pixels in the smallest images
# TODO: we downsize all images? i guess
batched_data = np.ndarray((n_samples // batch_size, batch_size, y_size, x_size, 3))
batched_boxes = np.ndarray((n_samples // batch_size, batch_size, 4))

for batch_index in range(n_samples // batch_size):
    for i, sample in enumerate(dataset[batch_index * batch_size:(batch_index + 1) * batch_size]):
        batched_data[batch_index, i] = cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size))

        for detection in sample['ground_truth']['detections']:
            if detection['label'] == 'Cat':
                batched_boxes[batch_index, i] = detection['bounding_box']
                break


In [None]:
# untested
import pickle
pickle.dump(batched_data, open('dataset/batched_data.pickle','wb+'))
pickle.dump(batched_boxes, open('dataset/batched_boxes.pickle','wb+'))

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 5)
        self.conv2 = nn.Conv2d(8, 16, 5)
        self.conv3 = nn.Conv2d(16, 32, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.pool2 = nn.MaxPool2d(2,2)
        self.pool3 = nn.MaxPool2d(3,3)
        # self.fc1 = nn.Linear(1024, 128)
        self.fc1 = nn.Linear(128, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 16)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        x = F.relu(self.conv2(x))
        x = self.pool3(x)
        x = F.relu(self.conv3(x))
        x = self.pool2(x)
        x = x.view(-1, self.num_flat_features(x))
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        # x = F.softmax(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

net = Net().to(device)

x_train_torch = torch.from_numpy(x.astype(np.float32))[:, None, :, :]
print(x.shape)
print(x_train_torch.shape)
y_train_torch = torch.from_numpy(y.astype(np.float32))
print(y.shape)
print(y_train_torch.shape)

criterion = nn.CrossEntropyLoss() # this is for the classification
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.6)



x_train_torch.to(device)
x_train_torch.requires_grad_(True)

previous_loss = 0
running_loss = 5
epoch = 0
# while (abs(running_loss - previous_loss) > 0.0001 and epoch < 1000):
while (epoch < 3000):
    previous_loss = running_loss
    epoch += 1
    running_loss = 0.0
    
    inputs = x_train_torch.to(device)
    labels = y_train_torch.to(device)

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    # argmax_outputs = torch.argmax(outputs.float(),dim=1)
    # print(argmax_outputs.shape, outputs.shape)
    # print(labels.shape)
    # print(argmax_outputs.shape, labels.shape)
    # print(outputs.shape, labels.shape, torch.min(labels))
    loss = criterion(outputs.requires_grad_(True).to(device), torch.subtract(labels.long(), 1).to(device))
    loss.backward()
    optimizer.step()


    # print statistics
    running_loss += loss.item()
    # if i % 2000 == 1999:    # print every 2000 mini-batches
    #     print('[%d, %5d] loss: %.3f' %
    #           (epoch + 1, i + 1, running_loss / 2000))
    #     running_loss = 0.0
    print("loss on epoch",epoch,":",running_loss, "\taccuracy:", 100*accuracy_score(labels.cpu(), torch.add(torch.argmax(outputs.cpu(), dim=1), 1).detach().numpy()))

print('Finished Training')