In [1]:
import argparse
import time
import torch.backends.cudnn as cudnn
from WSOL.utils import *
from WSOL.dataset.cub200 import *
from WSOL.models.loss import *
from WSOL.models.model import *
from torchvision import transforms
import yaml
from easydict import EasyDict as edict
import random
import logging
import pprint

In [2]:
PROJECT_PATH = '/home/billymicoder/Documents/GitHub/BillyCCAM/CCAM'
WSOL_PATH = '/home/billymicoder/Documents/GitHub/BillyCCAM/CCAM/WSOL'
ROOT_CUB = '/home/billymicoder/Documents/GitHub/BillyCCAM/CCAM/data/images/CUB_200_2011/images'
ROOT_ILSVRC = '/home/billymicoder/Documents/GitHub/BillyCCAM/CCAM/data/images/ILSVRC2012'

In [3]:
# benchmark before running
cudnn.benchmark = True
os.environ["NUMEXPR_NUM_THREADS"] = "16"
flag = True

In [4]:
import cmapy
def visualize_heatmap(config, experiments, images, attmaps, cls_name, image_name, phase='train', bboxes=None,
                      gt_bboxes=None):
    _, c, h, w = images.shape

    # Images - The original images
    original_images = images.squeeze().to('cpu').detach().numpy()
    # Attmaps - The attention maps of the images
    attmaps = attmaps.squeeze().to('cpu').detach().numpy()

    for i in range(images.shape[0]):

        # create folder
        if not os.path.exists('debug/images/{}/{}/colormaps/{}'.format(experiments, phase, cls_name[i])):
            os.mkdir('debug/images/{}/{}/colormaps/{}'.format(experiments, phase, cls_name[i]))

        attmap = attmaps[i]
        attmap = attmap / np.max(attmap)
        attmap = np.uint8(attmap * 255)
        # colormap = cv2.applyColorMap(cv2.resize(attmap, (w, h)), cv2.COLORMAP_JET)
        colormap = cv2.applyColorMap(cv2.resize(attmap, (w, h)), cmapy.cmap('seismic'))

        grid = make_grid(images[i].unsqueeze(0), nrow=1, padding=0, pad_value=0,
                         normalize=True, range=None)
        # Add 0.5 after unnormalizing to [0, 255] to round to nearest integer
        image = grid.mul_(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()[..., ::-1]
        # print(image.shape, colormap.shape)
        cam = colormap + 0.5 * image
        cam = cam / np.max(cam)
        cam = np.uint8(cam * 255).copy()
        bbox_image = image.copy()

        if bboxes is not None:
            box = bboxes[i][0]

            cv2.rectangle(bbox_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 0), 2)  # BGR

            if gt_bboxes is not None:
                if isinstance(gt_bboxes, list):
                    for j in range(gt_bboxes[i].shape[0]):
                        gtbox = gt_bboxes[i][j]
                        cv2.rectangle(bbox_image, (int(gtbox[1]), int(gtbox[2])), (int(gtbox[3]), int(gtbox[4])),
                                      (255, 0, 0), 2)
                else:
                    gtbox = gt_bboxes[i]
                    cv2.rectangle(bbox_image, (int(gtbox[1]), int(gtbox[2])), (int(gtbox[3]), int(gtbox[4])),
                                  (255, 0, 0),
                                  2)

        cv2.imwrite(f'debug/images/{experiments}/{phase}/colormaps/{cls_name[i]}/{image_name[i]}_raw.jpg', bbox_image)
        cv2.imwrite(f'debug/images/{experiments}/{phase}/colormaps/{cls_name[i]}/{image_name[i]}_heatmap.jpg', cam)



In [5]:
def extract(config, train_loader, model, threshold):

    # set up the averagemeters
    batch_time = AverageMeter()

    # switch to evaluate mode
    model.eval()
    global flag
    # record the time
    end = time.time()

    # extracting
    with torch.no_grad():
        for i, (input, target, cls_name, img_name) in enumerate(train_loader):

            # data to gpu
            input = input.cuda()

            # inference the model
            fg_feats, bg_feats, ccam = model(input)

            if flag:
                ccam = 1 - ccam

            pred_boxes = []  # x0,y0, x1, y1
            
            for j in range(input.size(0)):
                estimated_boxes_at_each_thr, _ = compute_bboxes_from_scoremaps(
                    ccam[j, 0, :, :].detach().cpu().numpy().astype(np.float32), [threshold], input.size(-1) / ccam.size(-1),
                    multi_contour_eval=False)
                pred_boxes.append(estimated_boxes_at_each_thr[0])

            # measure elapsed time
            torch.cuda.synchronize()

            batch_time.update(time.time() - end)
            end = time.time()

            # save predicted bboxes
            save_bbox_as_json(config, config.EXPERIMENT, i, 0, pred_boxes, cls_name, img_name)

            # print the current testing status
            if i % config.PRINT_FREQ == 0:
                print('[{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      .format(i, len(train_loader), batch_time=batch_time), flush=True)

                visualize_heatmap(config, config.EXPERIMENT, input.clone().detach(), ccam, cls_name, img_name, phase='train', bboxes=pred_boxes)


In [6]:

def test(config, test_loader, model, criterion, epoch):

    # set up the averagemeters
    batch_time = AverageMeter()
    losses = AverageMeter()
    losses_bg_bg = AverageMeter()
    losses_bg_fg = AverageMeter()
    losses_fg_fg = AverageMeter()
    threshold = [(i + 1) / config.NUM_THRESHOLD for i in range(config.NUM_THRESHOLD - 1)]
    print('current threshold list: {}'.format(threshold))

    # switch to evaluate mode
    model.eval()
    global flag
    # record the time
    end = time.time()

    total = 0
    Corcorrect = torch.Tensor([[0] for i in range(len(threshold))])

    # testing
    with torch.no_grad():
        for i, (input, target, bboxes, cls_name, img_name) in enumerate(test_loader):

            # data to gpu
            input = input.cuda()

            # inference the model
            fg_feats, bg_feats, ccam = model(input)

            if flag:
                ccam = 1 - ccam

            pred_boxes_t = [[] for j in range(len(threshold))]  # x0,y0, x1, y1
            for j in range(input.size(0)):

                estimated_boxes_at_each_thr, _ = compute_bboxes_from_scoremaps(
                    ccam[j, 0, :, :].detach().cpu().numpy().astype(np.float32), threshold, input.size(-1)/ccam.size(-1), multi_contour_eval=False)

                for k in range(len(threshold)):
                    pred_boxes_t[k].append(estimated_boxes_at_each_thr[k])

            loss1 = criterion[0](bg_feats)            # bg contrast bg
            loss2 = criterion[1](bg_feats, fg_feats)  # fg contrast fg
            loss3 = criterion[2](fg_feats)            # fg contrast fg
            loss = loss1 + loss2 + loss3

            # acc1 = accuracy(main_out.data, target)[0]
            losses.update(loss.data.item(), input.size(0))
            losses_bg_bg.update(loss1.data.item(), input.size(0))
            losses_bg_fg.update(loss2.data.item(), input.size(0))
            losses_fg_fg.update(loss3.data.item(), input.size(0))

            # measure elapsed time
            torch.cuda.synchronize()

            total += input.size(0)
            for j in range(len(threshold)):
                pred_boxes = pred_boxes_t[j]
                pred_boxes = torch.from_numpy(np.array([pred_boxes[k][0] for k in range(len(pred_boxes))])).float()
                gt_boxes = bboxes[:, 1:].float()

                # calculate
                inter = intersect(pred_boxes, gt_boxes)
                area_a = (pred_boxes[:, 2] - pred_boxes[:, 0]) * (pred_boxes[:, 3] - pred_boxes[:, 1])
                area_b = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] - gt_boxes[:, 1])
                union = area_a + area_b - inter
                IOU = inter / union
                IOU = torch.where(IOU <= 0.5, IOU, torch.ones(IOU.shape[0]))
                IOU = torch.where(IOU > 0.5, IOU, torch.zeros(IOU.shape[0]))

                Corcorrect[j] += IOU.sum()

            batch_time.update(time.time() - end)
            end = time.time()

            # print the current testing status
            if i % config.PRINT_FREQ == 0:
                print('Test: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'BG-C-BG {loss_bgbg.val:.4f} ({loss_bgbg.avg:.4f})\t'
                     'BG-C-FG {loss_bgfg.val:.4f} ({loss_bgfg.avg:.4f})\t'
                     'FG-C-FG {loss_fg_fg.val:.4f} ({loss_fg_fg.avg:.4f})'.format(
                    epoch, i, len(test_loader), batch_time=batch_time,
                    loss=losses, loss_bgbg=losses_bg_bg, loss_bgfg=losses_bg_fg, loss_fg_fg=losses_fg_fg), flush=True)

                # image debug
                visualize_heatmap(config, config.EXPERIMENT, input.clone().detach(), ccam, cls_name, img_name, phase='test', bboxes=pred_boxes_t[config.NUM_THRESHOLD // 2], gt_bboxes=bboxes)

    current_best_CorLoc = 0
    current_best_CorLoc_threshold = 0
    for i in range(len(threshold)):
        if (Corcorrect[i].item() / total) * 100 > current_best_CorLoc:
            current_best_CorLoc = (Corcorrect[i].item() / total) * 100
            current_best_CorLoc_threshold = threshold[i]

    print('Current => Correct: {:.2f}, threshold: {}'.format(current_best_CorLoc, current_best_CorLoc_threshold))

    return current_best_CorLoc, current_best_CorLoc_threshold


In [7]:

def train(config, train_loader, model, criterion, optimizer, epoch, scheduler):

    # set up the averagemeters
    batch_time = AverageMeter()
    losses = AverageMeter()
    losses_bg_bg = AverageMeter()
    losses_bg_fg = AverageMeter()
    losses_fg_fg = AverageMeter()
    global flag
    # switch to train mode
    model.train()
    # record time
    end = time.time()

    # training step
    for i, (input, target, cls_name, img_name) in enumerate(train_loader):

        # data to gpu
        input = input.cuda()

        optimizer.zero_grad()
        fg_feats, bg_feats, ccam = model(input)


        loss1 = criterion[0](bg_feats)
        loss2 = criterion[1](bg_feats, fg_feats)
        loss3 = criterion[2](fg_feats)
        loss = loss1 + loss2 + loss3

        loss.backward()
        optimizer.step()

        losses.update(loss.data.item(), input.size(0))
        losses_bg_bg.update(loss1.data.item(), input.size(0))
        losses_bg_fg.update(loss2.data.item(), input.size(0))
        losses_fg_fg.update(loss3.data.item(), input.size(0))

        if epoch == 0 and i == (len(train_loader)-1):
            flag = check_positive(ccam)
            print(f"Is Negative: {flag}")
        if flag:
            ccam = 1 - ccam

        # measure elapsed time
        torch.cuda.synchronize()
        batch_time.update(time.time() - end)
        end = time.time()

        # print the current status
        if i % config.PRINT_FREQ == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                 'BG-C-BG {loss_bgbg.val:.4f} ({loss_bgbg.avg:.4f})\t'
                 'BG-C-FG {loss_bgfg.val:.4f} ({loss_bgfg.avg:.4f})\t'
                 'FG-C-FG {loss_fg_fg.val:.4f} ({loss_fg_fg.avg:.4f})'.format(
                epoch, i, len(train_loader), batch_time=batch_time,
                loss=losses, loss_bgbg=losses_bg_bg, loss_bgfg=losses_bg_fg, loss_fg_fg=losses_fg_fg), flush=True)

            # image debug
            visualize_heatmap(config, config.EXPERIMENT, input.clone().detach(), ccam, cls_name, img_name)

    # print the learning rate
    lr = scheduler.get_last_lr()[0]
    print("Epoch {:d} finished with lr={:f}".format(epoch + 1, lr))


In [16]:

# def main(args, config, param_group, train_loader, model, optimizer):

In [8]:
from WSOL.optimizer import PolyOptimizer

"""
OMP_NUM_THREADS=16 
CUDA_VISIBLE_DEVICES=0 
python train_CCAM_CUB.py 
--experiment CCAM_CUB_IP 
--lr 0.0001 
--batch_size 16 
--pretrained supervised 
--alpha 0.05

parser = argparse.ArgumentParser(description="train CCAM on CUB dataset")
parser.add_argument('--cfg', help='experiment configuration filename', type=str,
                    default='config/CCAM_CUB.yaml')
parser.add_argument('--batch_size', type=int, default=16)
parser.add_argument('--lr', type=float, default=0.0001)
parser.add_argument('--alpha', type=float, default=0.05)
# Add a parameter that sets whether the images should be cropped or not
parser.add_argument('--crop', type=bool, default=False, help="apply image crop or not")
parser.add_argument('--experiment', type=str, required=True, help='record different experiments')
parser.add_argument('--pretrained', type=str, required=True, help='adopt different pretrained parameters, [supervised, mocov2, detco]')

"""

# join 'config/CCAM_CUB.yaml' to WSOL_PATH
joint_path = os.path.join(WSOL_PATH, 'config/CCAM_CUB.yaml')

with open(joint_path, 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

    # config = yaml.load(f) #changed to full_load()
    # config = yaml.full_load(f)
    config = edict(config)
    
config.EXPERIMENT = "CCAM_CUB_IP"
config.LR = 0.0001
# Add a parameter that sets whether the images should be cropped or not
config.CROP = False
config.BATCH_SIZE = 16
config.PRETRAINED = "supervised"
config.ALPHA = 0.05

# config, args = parse_arg()

logger = logging.getLogger()
logger.setLevel(logging.INFO)
console = logging.StreamHandler()
logging.getLogger('').addHandler(console)
logger.info(pprint.pformat(config))

if config.SEED != -1:
    torch.manual_seed(config.SEED)
    torch.cuda.manual_seed(config.SEED)
    np.random.seed(config.SEED)
    random.seed(config.SEED)

{'ALPHA': 0.05,
 'BATCH_SIZE': 16,
 'CROP': False,
 'CROP_DIR': 'cropped/',
 'DATA': 'CUB_200_2011',
 'DEBUG': 'debug',
 'DEPTH': 50,
 'EPOCHS': 20,
 'EVALUTATE': False,
 'EXPERIMENT': 'CCAM_CUB_IP',
 'FDIM': '2048+1024',
 'LOG_DIR': 'log/',
 'LR': 0.0001,
 'MOMENTUM': 0.9,
 'NUM_THRESHOLD': 20,
 'PRETRAINED': 'supervised',
 'PRINT_FREQ': 25,
 'RESUME': '',
 'ROOT': '/home/billymicoder/Documents/GitHub/BillyCCAM/CCAM/data/images/CUB_200_2011/',
 'SEED': 1,
 'WEIGHT_DECAY': 0.0001,
 'WORKERS': 4}


In [9]:


# print("=> creating log folder...")
# creat_folder(config, args)

# log
# sys.stdout = Logger('{}/{}_log.txt'.format(config.LOG_DIR, config.EXPERIMENT))

# Check if the crop parameter is true then set the crop directory
if config.CROP:
    print("=> using crop augmentation...")

    sys.stdout = Logger('{}/{}/'.format(config.CROP_DIR, config.EXPERIMENT))

# create model
print("=> creating model...")
model = get_model(pretrained=config.PRETRAINED).cuda()
param_groups = model.get_parameter_groups()
# model_info(model)

=> creating model...
Loading supervised pretrained parameters!


In [10]:
criterion = [SimMaxLoss(metric='cos', alpha=config.ALPHA).cuda(), SimMinLoss(metric='cos').cuda(),
                SimMaxLoss(metric='cos', alpha=config.ALPHA).cuda()]

# data augmentation
train_transforms = transforms.Compose([
    transforms.Resize(size=(256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(size=(224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

# we follow PSOL to adopt 448x448 as input to generate pseudo bounding boxes
test_transforms = transforms.Compose([
    transforms.Resize(size=(480, 480)),
    transforms.CenterCrop(size=(448, 448)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])

In [11]:
# wrap to dataset
train_data = CUB200(root=config.ROOT, input_size=256, crop_size=224, train=True, transform=train_transforms)
test_data = CUB200(root=config.ROOT, input_size=480, crop_size=448, train=False, transform=test_transforms)
print('load {} train images!'.format(len(train_data)))
print('load {} test images!'.format(len(test_data)))

# wrap to dataloader
train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=config.BATCH_SIZE, shuffle=True,
    num_workers=config.WORKERS, pin_memory=False)
test_loader = torch.utils.data.DataLoader(
    test_data, batch_size=config.BATCH_SIZE, shuffle=False,
    num_workers=config.WORKERS, pin_memory=True)


load 5994 train images!
load 5794 test images!


In [10]:
# define optimizer
max_step = len(train_data) // config.BATCH_SIZE * config.EPOCHS
optimizer = PolyOptimizer([
    {'params': param_groups[0], 'lr': config.LR, 'weight_decay': config.WEIGHT_DECAY},
    {'params': param_groups[1], 'lr': 2 * config.LR, 'weight_decay': 0},
    {'params': param_groups[2], 'lr': 10 * config.LR, 'weight_decay': config.WEIGHT_DECAY},
    {'params': param_groups[3], 'lr': 20 * config.LR, 'weight_decay': 0}
], lr=config.LR, weight_decay=config.WEIGHT_DECAY, max_step=max_step)

num_iters = len(train_loader)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_iters * config.EPOCHS)

In [None]:
start_epoch = 0
global_best_threshold = 0
# training part
for epoch in range(start_epoch, config.EPOCHS):
    # training
    train(config, train_loader, model, criterion, optimizer, epoch, scheduler)

    # testing
    best_CorLoc, best_threshold = test(config, test_loader, model, criterion, epoch)

    torch.save(
        {"state_dict": model.state_dict(),
            "epoch": epoch + 1,
            "CorLoc": best_CorLoc,
            "Threshold": best_threshold,
            "Flag": flag,
            }, '{}/checkpoints/{}/current_epoch.pth'.format(config.DEBUG, config.EXPERIMENT))

    global_best_threshold = best_threshold

    print('Training finished...')
    print('--------------------')

    print('Extracting class-agnostic bboxes using best threshold...')
    print('--------------------------------------------------------')

    
train_transforms = transforms.Compose([
    transforms.Resize(size=(480, 480)),
    transforms.CenterCrop(size=(448, 448)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
])
train_data = CUB200(root=config.ROOT, input_size=480, crop_size=448, train=True, transform=train_transforms)
train_loader = torch.utils.data.DataLoader(
    train_data, batch_size=config.BATCH_SIZE, shuffle=True,
    num_workers=config.WORKERS, pin_memory=False)

extract(config, train_loader, model, global_best_threshold)
print('Finished.')



In [13]:
# import tsne and matplotlib
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# Define a function to visualize the features
def visualize_features(features, labels, num_classes, title):
    '''
    Visualize the features using t-SNE.
    '''
    # Get the color map
    cmap = plt.get_cmap('tab20')

    # Define the figure
    figure = plt.figure(figsize=(8, 8))
    axes = plt.axes()

    # Get the list of all the classes
    classes = [i for i in range(num_classes)]

    # Iterate over all the classes
    for i, c in enumerate(classes):
        # Get the indexes of the images of the current class
        indexes = np.where(labels == c)[0]

        # Extract the features of the images of the current class
        x = [features[index] for index in indexes]

        # convert the features to a numpy array
        x = np.array(x)

        # Apply t-SNE to the features
        x_embedded = TSNE(n_components=2).fit_transform(x)

        # Scatter plot the points
        axes.scatter(x_embedded[:, 0], x_embedded[:, 1], label=c, cmap=cmap)

    # Set the title
    axes.set_title(title)

    # Set the legend
    axes.legend(loc='best')

    # Show the plot
    plt.show()

In [14]:
import gc
gc.collect()

0

In [15]:

from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm

# Define a transform to convert the PIL images to tensors
transform = transforms.ToTensor()


def load_images(root_folder):
    # Load all the images in CUB_200_2011/images/ with the labels as the folder names
    # and store them in a list of images
    cub_images = []
    cub_labels = []
    
    # counter = 0

    # Iterate through all the folders in the CUB_200_2011/images/ folder
    for folder in tqdm(os.listdir(root_folder)):
        # Get the path of the folder
        folder_path = os.path.join(root_folder, folder)
        # Get the list of all the images in the folder
        image_list = os.listdir(folder_path)
        # Iterate through all the images in the folder
        for img in image_list:
            # Get the path of the image
            img_path = os.path.join(folder_path, img)
            # Load the image and append it to the list of images
            # cub_images.append(transform(Image.open(img_path)))
            cub_images.append(Image.open(img_path))
            # Append the label
            cub_labels.append(folder)

        # Increment the counter
        # counter += 1

        # Break if the counter is 100
        # if counter == 100:
        #     break

    return cub_images, cub_labels

# Load all the images in CUB_200_2011/images/ with the labels as the folder names
# and store them in a list of images
cub_images, cub_labels = load_images(ROOT_CUB)

# Create a dataset from the list of images and labels
dataset = list(zip(cub_images, cub_labels))

# Create a dataloader
dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=4)
# train_loader = torch.utils.data.DataLoader(train_data, batch_size=config.BATCH_SIZE, shuffle=True)

100%|██████████| 200/200 [00:04<00:00, 43.90it/s]


In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from torchvision import utils as vutils
import torch.nn.functional as F

from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize(size=(480, 480)),
    transforms.ToTensor()
])

In [17]:
print(type(cub_images[0]))
print(type(cub_images))

<class 'PIL.JpegImagePlugin.JpegImageFile'>
<class 'list'>


In [19]:
from torchvision.datapoints import Image
from torchvision.transforms.functional import to_tensor
from tqdm import tqdm

# converteach pil image to tensor and store the in a list called cub_images_tensor
cub_images_tensor = []
for i in tqdm(range(len(cub_images))):
    # cub_images_tensor.append(torch.as_tensor(cub_images[i]))
    cub_images_tensor.append(Image(cub_images[i]))



100%|██████████| 11788/11788 [00:16<00:00, 712.67it/s] 


In [23]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(cub_labels)

cub_labels_encoded = le.transform(cub_labels)

# Convert the labels into a tensor
cub_labels_tensor = torch.tensor(cub_labels_encoded)



In [27]:
# create a dataset from the list of images and labels tensors
dataset = list(zip(cub_images_tensor, cub_labels_tensor))

# Create a dataloader
dataloader = DataLoader(dataset, shuffle=True, num_workers=4)

In [28]:

images, labels = next(iter(dataloader))


In [33]:
# Move the images and labels to the device
images = images.to(device)
labels = labels.to(device)

# Get the features for the images
features = model(images)

# Get the predictions for the images
predictions = F.softmax(features, dim=1)

# Get the class with the highest probability
predictions = torch.argmax(predictions, dim=1)

# Get the accuracy
accuracy = torch.sum(predictions == labels).item() / len(labels)

# Print the accuracy
print('Accuracy: {:.2f}%'.format(accuracy * 100))


RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same

In [None]:

# visualize the features
visualize_features(features.cpu().detach().numpy(), labels.cpu().detach().numpy(), 200, 't-SNE visualization of the features')

In [32]:
# resize the tensors to the same width
# images_resized = F.interpolate(images, mode='nearest')
# labels_resized = F.interpolate(labels, mode='nearest')

# Concatenate the images and labels along the width dimension
# images_labels = torch.cat((images, labels), dim=3)

# Visualize the images
plt.figure(figsize=(16, 16))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(labels[:64], padding=2, normalize=True).cpu(), (1, 2, 0)))
plt.show()




RuntimeError: result type Float can't be cast to the desired output type long int

In [18]:
type(dataloader)

torch.utils.data.dataloader.DataLoader

In [22]:
from tqdm import tqdm
def extract_features(model, images):
    '''
    Extract the features of the images using the pretrained ResNet50 model.
    The features are extracted from the last convolutional layer.
    '''
    # Set the model to evaluation mode
    model.eval()

    # Define a list to store the features
    features = []

    # Iterate over all the images
    for img in tqdm(images):
        # Convert the image to a torch.Tensor and normalize the image
        # The image size should be (3, 224, 224)
        img = transforms.ToTensor()(img)
        img = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])(img)
        
        # Add a batch dimension to the image
        # The image size should be (1, 3, 224, 224)
        img = img.unsqueeze(0)

        
        # Move the image to the device
        img = img.to(device)

        # Extract the features
        # The feature size should be (1, 2048, 7, 7)
        # inputs, labels = inputs.to(device), labels.to(device)
        feature = model(img)

        # Remove the batch dimension and append the feature to the list
        # The feature size should be (2048, 7, 7)
        # features.append(feature.squeeze(0))
        features.append(feature)

    return features

# Extract the features
features = extract_features(model, images)


  1%|          | 30/5893 [02:34<8:23:58,  5.16s/it] 


KeyboardInterrupt: 

In [24]:
# clear cuda cache
torch.cuda.empty_cache()

In [23]:

# visualise the features
visualize_features(features, labels)

NameError: name 'features' is not defined