In [1]:
import torch
import torchvision
import numpy as np
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision import transforms
import sys
from tqdm import tqdm
import numpy as np
from albumentations.pytorch import ToTensorV2
import albumentations as A

sys.path.append("/workspaces/ood/")

%load_ext autoreload
%autoreload 2
device = torch.device("cuda")
torch.cuda.is_available()

True

In [2]:
# wide resnet
height = 256
width = 256
transform = A.Compose(
    [
        A.Resize(height=height, width=width, always_apply=True),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

In [3]:
cifar_data_train = torchvision.datasets.CIFAR10(
    "../data/cifar10", download=False  # , transform=transform
)

In [4]:
wide_resnet50 = torchvision.models.wide_resnet50_2(pretrained=False).to(device)
wide_resnet50.load_state_dict(
    torch.load(
        "/workspaces/ood/data/models/torch/hub/checkpoints/wide_resnet50_2-95faca4d.pth"
    )
)

for name, param in wide_resnet50.named_parameters():
    param.requires_grad = False

In [5]:
from fastflow.extract_features import create_feature_dataset

model_name = "wide_resnet50"
dataset_name = "cifar10train"
layers = ["layer2", "layer3", "layer4"]
out_dims = [[512, 32, 32], [1024, 16, 16], [2048, 8, 8]]
num_images_per_class = 1000
out_name = (
    "../data/feature_maps/"
    + "_".join([dataset_name] + [model_name] + layers)
    + f"_{num_images_per_class}_img_per_class.npz"
)

In [6]:
create_feature_dataset(
    model=wide_resnet50,
    layers=layers,
    out_dims=out_dims,
    dataset=cifar_data_train,
    num_images_per_class=num_images_per_class,
    out_name=out_name,
    transform=transform,
    device=device,
)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
 21%|██▏       | 10716/50000 [02:34<09:24, 69.56it/s] 


all classes are computed
saving to ../data/feature_maps/cifar10train_wide_resnet50_layer2_layer3_layer4_1000_img_per_class.npz...


In [7]:
from fastflow.extract_features import create_feature_dataset

cifar_data_test = torchvision.datasets.CIFAR10(
    "../data/cifar10", download=False, train=False
)

model_name = "wide_resnet50"
dataset_name = "cifar10test"
layers = ["layer2", "layer3", "layer4"]
out_dims = [[512, 32, 32], [1024, 16, 16], [2048, 8, 8]]
num_images_per_class = 100
out_name = (
    "../data/feature_maps/"
    + "_".join([dataset_name] + [model_name] + layers)
    + f"_{num_images_per_class}_img_per_class.npz"
)

create_feature_dataset(
    model=wide_resnet50,
    layers=layers,
    out_dims=out_dims,
    dataset=cifar_data_test,
    transform=transform,
    num_images_per_class=num_images_per_class,
    out_name=out_name,
    device=device,
)

 12%|█▏        | 1215/10000 [00:14<01:48, 81.09it/s] 


all classes are computed
saving to ../data/feature_maps/cifar10test_wide_resnet50_layer2_layer3_layer4_100_img_per_class.npz...


In [8]:
from fastflow.extract_features import create_feature_dataset

svhn_data_test = torchvision.datasets.SVHN("../data/svhn", download=False, split="test")

model_name = "wide_resnet50"
dataset_name = "svhntest"
layers = ["layer2", "layer3", "layer4"]
out_dims = [[512, 32, 32], [1024, 16, 16], [2048, 8, 8]]
num_images_per_class = 100
out_name = (
    "../data/feature_maps/"
    + "_".join([dataset_name] + [model_name] + layers)
    + f"_{num_images_per_class}_img_per_class.npz"
)

create_feature_dataset(
    model=wide_resnet50,
    layers=layers,
    out_dims=out_dims,
    transform=transform,
    dataset=cifar_data_test,
    num_images_per_class=num_images_per_class,
    out_name=out_name,
    device=device,
)

 12%|█▏        | 1215/10000 [00:14<01:47, 81.92it/s] 


all classes are computed
saving to ../data/feature_maps/svhntest_wide_resnet50_layer2_layer3_layer4_100_img_per_class.npz...


## Split files

In [9]:
from fastflow.extract_features import split_layers

model_name = "wide_resnet50"
dataset_name = "cifar10train"
layers = ["layer2", "layer3", "layer4"]
out_dims = [[512, 32, 32], [1024, 16, 16], [2048, 8, 8]]
num_images_per_class = 1000
out_name = (
    "../data/feature_maps/"
    + "_".join([dataset_name] + [model_name] + layers)
    + f"_{num_images_per_class}_img_per_class.npz"
)
split_layers(out_name, layers)

  0%|          | 0/3 [03:05<?, ?it/s]


BadZipFile: Bad CRC-32 for file 'layer2.npy'

In [9]:
test = np.load(
    "/workspaces/ood/data/feature_maps/cifar10test_wide_resnet50_layer2_100_img_per_class.npy"
)

In [None]:
# from feature_extractor import FeatureExtractor
# encoder = FeatureExtractor(wide_resnet50, layers)
# encoder.eval()

# preds = {layers[i] : np.zeros([num_images_per_class * 10]+out_dims[i], dtype=np.float32) for i in range(len(layers))}
# label_counts = {i:0 for i in range(10)}
# label_not_finish = [True for _ in range(10)]
# i = 0
# for image, label in tqdm(cifar_data_train):
#     if i > 140:
#         break
#     if not any(label_not_finish):
#         break
#     if label_counts[label] >= num_images_per_class:
#         label_not_finish[label] = False
#         continue

#     features = encoder(torch.unsqueeze(image.to(device), dim=0))
#     for layer in features.keys():
#         preds[layer][i] = features[layer].detach().cpu().numpy()[0]
#     i+=1
#     label_counts[label]+=1