In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.models import squeezenet1_1, SqueezeNet1_1_Weights
import matplotlib.pyplot as plt
from torchinfo import summary
import itertools
from PIL import Image
from torchmetrics import detection
from time import time

In [53]:
model = squeezenet1_1(weights=SqueezeNet1_1_Weights.DEFAULT) 

In [54]:
transform = torchvision.models.SqueezeNet1_1_Weights.DEFAULT.transforms()
transform

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [55]:
batch_size = 4

nottestset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

generator1 = torch.Generator().manual_seed(5)

trainset, validationset = torch.utils.data.random_split(nottestset, [0.8, 0.2], generator=generator1)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

validationloader = torch.utils.data.DataLoader(validationset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [56]:
summary(model, 
        input_size=(1, 3, 224, 224),
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
    )

Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
SqueezeNet (SqueezeNet)                       [1, 3, 224, 224]     [1, 1000]            --                   True
├─Sequential (features)                       [1, 3, 224, 224]     [1, 512, 13, 13]     --                   True
│    └─Conv2d (0)                             [1, 3, 224, 224]     [1, 64, 111, 111]    1,792                True
│    └─ReLU (1)                               [1, 64, 111, 111]    [1, 64, 111, 111]    --                   --
│    └─MaxPool2d (2)                          [1, 64, 111, 111]    [1, 64, 55, 55]      --                   --
│    └─Fire (3)                               [1, 64, 55, 55]      [1, 128, 55, 55]     --                   True
│    │    └─Conv2d (squeeze)                  [1, 64, 55, 55]      [1, 16, 55, 55]      1,040                True
│    │    └─ReLU (squeeze_activation)         [1, 16, 55, 55]      [1, 16, 55, 55]     

In [57]:
model.classifier = torch.nn.Sequential(
                        torch.nn.AdaptiveAvgPool2d(1),
                        torch.nn.Flatten(),
                        torch.nn.Linear(in_features=512, 
                            out_features=10,
                            bias=True))

In [58]:
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True

In [59]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

start = time()
for epoch in range(2):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')
end = time()
print(end - start)

[1,  2000] loss: 4.287
[1,  4000] loss: 4.148
[1,  6000] loss: 3.739
[1,  8000] loss: 3.734
[1, 10000] loss: 3.452
[2,  2000] loss: 2.940
[2,  4000] loss: 3.393
[2,  6000] loss: 3.409
[2,  8000] loss: 3.515
[2, 10000] loss: 3.354
Finished Training
1814.7992973327637


In [60]:

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test set: {100 * correct // total} %')


Accuracy of the network on the test set: 75 %


In [49]:
for param in model.parameters():
    param.requires_grad = False
for param in model.classifier.parameters():
    param.requires_grad = True
for param in model.features[9:].parameters():
    param.requires_grad = True

In [50]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

start = time()
for epoch in range(1):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')
end = time()
print(end - start)

[1,  2000] loss: 0.972
[1,  4000] loss: 0.587
[1,  6000] loss: 0.518
[1,  8000] loss: 0.483
[1, 10000] loss: 0.453
Finished Training
1135.1543862819672


In [51]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test set: {100 * correct // total} %')

Accuracy of the network on the test set: 84 %


In [111]:
for param in model.parameters():
    param.requires_grad = True

In [112]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

start = time()
for epoch in range(1):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')
end = time()
print(end - start)

[1,  2000] loss: 0.248
[1,  4000] loss: 0.253
[1,  6000] loss: 0.251
[1,  8000] loss: 0.254
[1, 10000] loss: 0.246
Finished Training
1913.9227967262268


In [113]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test set: {100 * correct // total} %')

Accuracy of the network on the test set: 88 %


# Q2

In [25]:
from torchvision.io.image import read_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

In [26]:
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights = weights)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

In [34]:
import fiftyone as fo
import fiftyone.zoo as foz

fiftyone_data = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    label_types=["detections"],
    classes=["cat", "dog"],
    max_samples=100,
)

Downloading split 'validation' to 'C:\Users\simon\fiftyone\coco-2017\validation' if necessary
Found annotations at 'C:\Users\simon\fiftyone\coco-2017\raw\instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'coco-2017-validation-100'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [35]:
import fiftyone.utils.coco as fouc

#adapted from https://github.com/voxel51/fiftyone-examples/blob/master/examples/pytorch_detection_training.ipynb
class FiftyOneTorchDataset(torch.utils.data.Dataset):
    """A class to construct a PyTorch dataset from a FiftyOne dataset.
    
    Args:
        fiftyone_dataset: a FiftyOne dataset or view that will be used for training or testing
        transforms (None): a list of PyTorch transforms to apply to images and targets when loading
        gt_field ("ground_truth"): the name of the field in fiftyone_dataset that contains the 
            desired labels to load
        classes (None): a list of class strings that are used to define the mapping between
            class names and indices. If None, it will use all classes present in the given fiftyone_dataset.
    """

    def __init__(
        self,
        fiftyone_dataset,
        transforms=None,
        gt_field="ground_truth",
        classes=None,
    ):
        self.samples = fiftyone_dataset
        self.transforms = transforms
        self.gt_field = gt_field

        self.img_paths = self.samples.values("filepath")

        self.classes = classes
        if not self.classes:
            self.classes = self.samples.distinct(
                "%s.detections.label" % gt_field
            )

        if self.classes[0] != "background":
            self.classes = ["background"] + self.classes

        self.labels_map_rev = {c: i for i, c in enumerate(self.classes)}

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        sample = self.samples[img_path]
        metadata = sample.metadata
        img = Image.open(img_path).convert("RGB")

        boxes = []
        labels = []
        area = []
        iscrowd = []
        detections = sample[self.gt_field].detections
        for det in detections:
            category_id = self.labels_map_rev[det.label]
            coco_obj = fouc.COCOObject.from_label(
                det, metadata, category_id=category_id,
            )
            x, y, w, h = coco_obj.bbox
            boxes.append([x, y, x + w, y + h])
            labels.append(coco_obj.category_id)
            area.append(coco_obj.area)
            iscrowd.append(coco_obj.iscrowd)

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
        target["image_id"] = torch.as_tensor([idx])
        target["area"] = torch.as_tensor(area, dtype=torch.float32)
        target["iscrowd"] = torch.as_tensor(iscrowd, dtype=torch.int64)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.img_paths)

    def get_classes(self):
        return self.classes

In [36]:
dataset = FiftyOneTorchDataset(fiftyone_data)
detection_transform = transforms.Compose([transforms.ToTensor()])

In [39]:
predictions = []
targets = []
i = 0

sorter = np.argsort(dataset.classes)
for data in dataset:
    image = data[0]
    target = data[1]   
    prediction = model(detection_transform(image)[None,:,:,:])[0]
    
    label_strings = np.asarray(weights.meta["categories"])[prediction['labels']]
    converted_labels = torch.tensor(sorter[np.searchsorted(dataset.classes, label_strings, sorter=sorter)])
    prediction['labels'] = converted_labels
    
    predictions += [prediction]
    targets += [target]
    
    i += 1
    if (i % 10 == 0):
        print(i)
        

metric = detection.MeanAveragePrecision()
metric.update(predictions, targets)
metric.compute()

10
20
30
40
50
60
70
80
90
100


{'map': tensor(0.5207),
 'map_50': tensor(0.6882),
 'map_75': tensor(0.6310),
 'map_small': tensor(-1.),
 'map_medium': tensor(0.3168),
 'map_large': tensor(0.5461),
 'mar_1': tensor(0.4907),
 'mar_10': tensor(0.6566),
 'mar_100': tensor(0.6566),
 'mar_small': tensor(-1.),
 'mar_medium': tensor(0.3167),
 'mar_large': tensor(0.6876),
 'map_per_class': tensor(-1.),
 'mar_100_per_class': tensor(-1.)}

In [17]:
def make_random_colors(n):
    colors = [()]*n
    for i in range(n):
        random_color = tuple(np.random.choice(range(255),size=3))
        colors[i] = random_color
    return colors

In [18]:
PIL_transform = transforms.Compose([transforms.PILToTensor()])
random_colors = make_random_colors(len(weights.meta["categories"]))

def show_prediction(img, prediction):
    img = PIL_transform(img)
    cutoff = (prediction['scores'] > 0.8).sum()
    labels = [weights.meta["categories"][i] for i in prediction["labels"]]
    box = draw_bounding_boxes(img, boxes=prediction["boxes"][:cutoff],
                              labels=labels[:cutoff],
                              colors=[random_colors[l] for l in prediction["labels"]],
                              width=4, font_size=30)
    im = to_pil_image(box.detach())
    im.show()

In [20]:
for i in range(4):
    prediction = model(detection_transform(dataset[i][0])[None,:,:,:])[0]
    show_prediction(dataset[i][0], prediction)

# Q3

In [None]:
#Note - Q3 code was executed in collab due to high memory requirements, so cell output is not visible here
from zipfile import ZipFile
#!unzip lfwcrop_color.zip
with ZipFile("lfwcrop_color.zip", 'r') as zObject:
  
    # Extracting all the members of the zip 
    # into a specific location.
    zObject.extractall(
        path="lfw_data")

In [None]:
#structure target data
from PIL import Image
import os
import glob
d = 0
for filename in glob.glob('lfw_data/lfwcrop_color/faces/*ppm'):
    img = Image.open(filename)

    name = filename[filename.index('faces/')+6:filename.index('.')]

    newfilename = "lfw_data_structured/target/" + name + "/" + name + ".jpg"
    os.makedirs(os.path.dirname(newfilename), exist_ok=True)
    img = img.save(newfilename)
    d+=1
    if (d % 1000 == 0):
        print(d)

In [None]:
#create (noisy) input data
d = 0
for filename in glob.glob('lfw_data/lfwcrop_color/faces/*ppm'):
    img = Image.open(filename)
    
    noise = np.random.normal(0.0, 255.0 * 0.1, (10, 64, 64, 3))
    noisy_images = np.clip((noise + np.array(img)).astype(np.uint8), 0, 255)
    
    name = filename[filename.index('faces/')+6:filename.index('.')]
    i = 1
    for noisy_image in noisy_images:
        new_img = Image.fromarray(noisy_image)
        newfilename = "lfw_data_structured/noisy/" + name + "/" + name + "_%d.jpg"%i
        os.makedirs(os.path.dirname(newfilename), exist_ok=True)
        new_img = new_img.save(newfilename)
        i+=1
    d+=1
    if (d % 1000 == 0):
        print(d)

In [142]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     ])
dataset_noise = torchvision.datasets.ImageFolder('lfw_data_structured/noisy', transform=transform)
dataset_target = torchvision.datasets.ImageFolder('lfw_data_structured/target', transform=transform)

In [143]:
class DualImageDataset(torch.utils.data.Dataset):
    def __init__(self, noisy, target):
        self.noisy = noisy
        self.target = target

    def __len__(self):
        return len(self.noisy)

    def __getitem__(self, index):
        noisy_image, target_image = self.noisy[index][0], self.target[self.noisy[index][1]][0]
        return noisy_image, target_image
    
dataset = DualImageDataset(dataset_noise, dataset_target)

In [160]:
batch_size = 5

generator1 = torch.Generator().manual_seed(5)

trainset, validationset, testset = torch.utils.data.random_split(dataset, [0.8, 0.1, 0.1], generator=generator1)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

validationloader = torch.utils.data.DataLoader(validationset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

In [151]:
#denoising neural network implementation
class AutoEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2, return_indices=True)
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)    
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1) 
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        
        self.unpool = nn.MaxUnpool2d(2, 2)
        self.conv5 = nn.Conv2d(256, 128, 3, padding=1)
        self.conv6 = nn.Conv2d(128, 64, 3, padding=1)
        self.conv7 = nn.Conv2d(64, 32, 3, padding=1) 
        self.conv8 = nn.Conv2d(32, 3, 3, padding=1)
        

    def forward(self, x):
        x, i1 = self.pool(F.relu(self.conv1(x)))      
        x, i2 = self.pool(F.relu(self.conv2(x)))    
        x, i3 = self.pool(F.relu(self.conv3(x)))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(self.unpool(x, i3)))
        x = F.relu(self.conv7(self.unpool(x, i2)))
        x = F.relu(self.conv8(self.unpool(x, i1)))
        return x


net = AutoEncoder()

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
net = net.to(device)

cpu


In [None]:
criterion = torch.nn.MSELoss()
#optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

for epoch in range(1):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.3f}')
            running_loss = 0.0

print('Finished Training')

In [None]:
sample_at = 5
k = int(np.floor(sample_at/batch_size))

my_sample = next(itertools.islice(trainloader, k, None))

In [None]:
noisy, target = my_sample[0], my_sample[1]
plt.imshow(target[1].permute(1, 2, 0))

In [None]:
plt.imshow(noisy[1].permute(1, 2, 0))

In [None]:
with torch.no_grad():
    net = net.to('cpu')
    denoised = net(noisy)
    plt.imshow(denoised[1].permute(1, 2, 0))

In [None]:
correct = 0
total = 0
i = 0
net = net.to(device)
with torch.no_grad():
    for data in testloader:
        i += 1
        inputs, targets = data
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        total += loss

print(f'Average MSE on the test set: {total / i}')