In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [None]:
import os
os.chdir('/content/drive/MyDrive/DLCV/project/da-fusion/')

In [None]:
!unzip pascal.zip
# !git clone https://github.com/brandontrabucco/da-fusion.git

In [None]:
!pip install diffusers["torch"] transformers pycocotools pandas matplotlib seaborn scipy datasets

In [None]:
!pip install -e da-fusion

In [None]:
# !git clone https://github.com/CompVis/stable-diffusion.git

In [None]:
!pip install -e stable-diffusion/

Installing collected packages: latent-diffusion
  Attempting uninstall: latent-diffusion
    Found existing installation: latent-diffusion 0.0.1
    Uninstalling latent-diffusion-0.0.1:
      Successfully uninstalled latent-diffusion-0.0.1
  Running setup.py develop for latent-diffusion
Successfully installed latent-diffusion-0.0.1


In [None]:
!python train_classifier.py --logdir pascal-baselines/textual-inversion-0.5 \
--synthetic-dir "aug/textual-inversion-0.5/{dataset}-{seed}-{examples_per_class}" \
--dataset pascal --prompt "a photo of a {name}" \
--aug textual-inversion --guidance-scale 7.5 \
--strength 0.5 --mask 0 --inverted 0 \
--num-synthetic 10 --synthetic-probability 0.5 \
--num-trials 1 --examples-per-class 4

In [None]:
!unzip /content/drive/MyDrive/DLCV/project/da-fusion/data/pascal.zip

In [None]:
import os
import torch
import numpy as np
import time
import tqdm
import shutil
import torch.nn as nn
import torchvision.models as  models
from torch.utils.data import DataLoader, ConcatDataset
import torchvision.datasets.voc as voc
import torch.optim as optim
from PIL import Image
from glob import glob
from torchvision import transforms
from torchvision.models import resnet18
import torch.utils.model_zoo as model_zoo
import xml.etree.cElementTree as ET

In [None]:
data_dir = '/content/drive/MyDrive/DLCV/project/da-fusion/data'
aug_dir = '/content/drive/MyDrive/DLCV/project/da-fusion/pascal-aug'
ckpt_dir = '/content/drive/MyDrive/cs444/project/checkpoints'
object_categories = ['aeroplane', 'bicycle', 'bird', 'boat',
                     'bottle', 'bus', 'car', 'cat', 'chair',
                     'cow', 'diningtable', 'dog', 'horse',
                     'motorbike', 'person', 'pottedplant',
                     'sheep', 'sofa', 'train', 'tvmonitor']
num_classes = len(object_categories)
batch_size = 32
resnet_lr = 1e-5
fc_lr = 5e-3
num_epochs = 35

mean = [0.457342265910642, 0.4387686270106377, 0.4073427106250871]
std = [0.26753769276329037, 0.2638145880487105, 0.2776826934044154]

device = 'cuda' if torch.cuda.is_available() else 'cpu'
np.random.seed(1902)
torch.manual_seed(1902)

<torch._C.Generator at 0x7ff69e76a970>

In [None]:
class PascalVOC_Dataset(voc.VOCDetection):
    """Pascal VOC Detection Dataset"""
    def __init__(self, root, image_set='train', download=False, transform=None, target_transform=None):
        super().__init__(root, image_set=image_set, download=download, transform=transform, target_transform=target_transform)
    
    def __getitem__(self, index):
        return super().__getitem__(index)
    
    def __len__(self):
        return len(self.images)

In [None]:
def encode_labels(target):
    """Encode multiple labels using 1/0 encoding"""
    ls = target['annotation']['object']
    j = []
    if type(ls) == dict:
        if int(ls['difficult']) == 0:
            j.append(object_categories.index(ls['name']))
    else:
        for i in range(len(ls)):
            if int(ls[i]['difficult']) == 0:
                j.append(object_categories.index(ls[i]['name']))
    k = np.zeros(len(object_categories))
    k[j] = 1
    return torch.from_numpy(k)

In [None]:
transformations = transforms.Compose([transforms.Resize((300, 300)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=mean, std=std)])
transformations_valid = transforms.Compose([transforms.Resize(330), 
                                            transforms.CenterCrop(300), 
                                            transforms.ToTensor(),
                                            transforms.Normalize(mean=mean, std=std)])

In [None]:
dataset_train = PascalVOC_Dataset(data_dir,
                                  image_set='train', 
                                  download=False, 
                                  transform=transformations, 
                                  target_transform=encode_labels)
dataset_aug = PascalVOC_Dataset(aug_dir,
                                  image_set='train', 
                                  download=False, 
                                  transform=transformations, 
                                  target_transform=encode_labels)

dataset_combined = ConcatDataset([dataset_train, dataset_aug])

train_loader = DataLoader(dataset_combined, batch_size=batch_size, num_workers=2, shuffle=True)

dataset_valid = PascalVOC_Dataset(data_dir, 
                                  image_set='val', 
                                  download=False, 
                                  transform=transformations_valid, 
                                  target_transform=encode_labels)
valid_loader = DataLoader(dataset_valid, batch_size=batch_size, num_workers=2)

In [None]:
net = resnet18(pretrained=True)
net.avgpool = torch.nn.AdaptiveAvgPool2d(1)
num_ftrs = net.fc.in_features
net.fc = torch.nn.Linear(num_ftrs, num_classes)
net = net.to(device)

In [None]:
optimizer = optim.SGD([{'params': list(net.parameters())[:-1], 'lr': resnet_lr, 'momentum': 0.9},
                       {'params': list(net.parameters())[-1], 'lr': fc_lr, 'momentum': 0.9}])
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 12, eta_min=0, last_epoch=-1)
criterion = torch.nn.BCEWithLogitsLoss(reduction='sum')

In [None]:
def run_test(net, test_loader, criterion):
    correct = 0
    total = 0
    avg_test_loss = 0.0
    l = len(test_loader)
    with torch.no_grad():
        for _, (images, labels) in enumerate(test_loader):
            images, labels = images.to(device), labels.to(device)
            
            outputs = net(images)
            predictions = torch.argmax(outputs, dim=1)
            labels = torch.argmax(labels, dim=1)
            correct += torch.sum(predictions == labels)
            total += labels.size(0)

    print(f'Accuracy of the network on the test images: {100 * correct / total:.2f} %')

In [None]:
def train(net, criterion, optimizer, num_epochs, print_freq = 100):
    for epoch in range(num_epochs):
        running_loss = 0.0
        running_correct = 0.0
        running_total = 0.0
        start_time = time.time()

        net.train()

        for i, (images, labels) in enumerate(train_loader, 0):
            images = images.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Get predicted results
            predicted = torch.argmax(outputs, dim=1)
            labels = torch.argmax(labels, dim=1)

            # print statistics
            running_loss += loss.item()

            # calculate accuracy
            running_total += labels.size(0)
            running_correct += (predicted == labels).sum().item()

            # print every 2000 mini-batches
            if i % print_freq == (print_freq - 1):
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / print_freq:.3f} acc: {100*running_correct / running_total:.2f} time: {time.time() - start_time:.2f}')
                running_loss, running_correct, running_total = 0.0, 0.0, 0.0
                start_time = time.time()

        # Run the run_test() function after each epoch
        net.eval()
        run_test(net, valid_loader, criterion)

In [None]:
train(net, criterion, optimizer, num_epochs=num_epochs)

save_dir = os.path.join('/content/drive/MyDrive/DLCV/project/da-fusion/', 'da-fusion.pt')
torch.save(net.state_dict(), save_dir)

[1,   100] loss: 131.577 acc: 20.66 time: 37.88
[1,   200] loss: 111.413 acc: 37.88 time: 37.85
Accuracy of the network on the test images: 55.11 %
[2,   100] loss: 97.998 acc: 49.03 time: 38.26
[2,   200] loss: 89.574 acc: 55.75 time: 37.68
Accuracy of the network on the test images: 60.14 %
[3,   100] loss: 80.951 acc: 60.50 time: 38.49
[3,   200] loss: 77.501 acc: 61.66 time: 37.64
Accuracy of the network on the test images: 66.03 %
[4,   100] loss: 72.008 acc: 65.97 time: 36.87
[4,   200] loss: 69.068 acc: 65.34 time: 37.92
Accuracy of the network on the test images: 66.68 %
[5,   100] loss: 66.385 acc: 68.12 time: 38.97
[5,   200] loss: 64.427 acc: 67.25 time: 38.31
Accuracy of the network on the test images: 68.61 %
[6,   100] loss: 61.172 acc: 68.38 time: 37.95
[6,   200] loss: 60.174 acc: 69.47 time: 37.56
Accuracy of the network on the test images: 69.26 %
[7,   100] loss: 57.127 acc: 70.59 time: 37.49
[7,   200] loss: 56.402 acc: 70.97 time: 38.25
Accuracy of the network on t