In [12]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from tqdm import tqdm
from torchvision.models import resnet18
import numpy as np

In [2]:
args = {
    'learning_rate': 5e-2,
    'batch_size': 16,
    'num_worker': 4,
    'random_seed': 8771795,
    'augmentation': False,
    'num_epoch': 20,
    'cuda': '3'
}

In [3]:
# set device
os.environ["CUDA_VISIBLE_DEVICES"] = args['cuda']
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Set random seed
torch.random.manual_seed(args['random_seed'])

# Define transformation
test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Grayscale(num_output_channels=3),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

train_valid_transform = test_transform
if args['augmentation']:
    train_valid_transform = transforms.Compose([
    #         transforms.RandomResizedCrop((224, 224)),
            transforms.Resize((224, 224)),
            transforms.Grayscale(num_output_channels=3),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.RandomErasing(),
            transforms.Normalize((0.5,), (0.5,))
        ])

# Load dataset
require_download = os.path.exists('./dataset')
train_valid_dataset = torchvision.datasets.FashionMNIST('./dataset', train=True, transform=train_valid_transform, download=True)
test_dataset = torchvision.datasets.FashionMNIST('./dataset', train=False, transform=test_transform, download=True)

# Split train and validation
torch.random.manual_seed(args['random_seed'])
train_dataset, valid_dataset = torch.utils.data.random_split(train_valid_dataset, [54000, 6000])

# Generate dataloader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True, num_workers=args['num_worker'])
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args['batch_size'], shuffle=False, num_workers=args['num_worker'])
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False, num_workers=args['num_worker'])

In [4]:
model = resnet18(pretrained=True)

In [5]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [6]:
model.fc = Identity()

In [7]:
model.eval()
print('total parameters:', sum(p.numel() for p in model.parameters()))

total parameters: 11176512


In [8]:
if not os.path.exists('./features'):
    os.mkdir('./features')

In [9]:
model = model.to(device)

In [10]:
train_feats, valid_feats, test_feats = [], [], []
train_labels, valid_labels, test_labels = [], [], []
for iteration, (x, y) in tqdm(enumerate(train_loader)):
    x, y = x.to(device), y.to(device)
    features = model(x)
    train_feats.append(features.cpu().detach().numpy())
    train_labels.append(y.cpu().detach().numpy())
    del x, y, features

3375it [00:46, 72.41it/s]


In [13]:
train_feats = np.concatenate(train_feats)

In [14]:
train_feats.shape

(54000, 512)

In [15]:
train_labels = np.concatenate(train_labels)
train_labels.shape

(54000,)

In [17]:
np.save('features/resnet18_train_feat.npy', train_feats)
np.save('features/resnet18_train_label.npy', train_labels)

In [18]:
del train_labels, train_feats

In [19]:
for iteration, (x, y) in tqdm(enumerate(valid_loader)):
    model.eval()
    x, y = x.to(device), y.to(device)
    features = model(x)
    valid_feats.append(features.cpu().detach().numpy())
    valid_labels.append(y.cpu().detach().numpy())
    del x, y, features

375it [00:05, 71.10it/s]


In [22]:
valid_feats = np.concatenate(valid_feats)
valid_feats.shape

(6000, 512)

In [23]:
valid_labels = np.concatenate(valid_labels)
valid_labels.shape

(6000,)

In [24]:
np.save('features/resnet18_valid_feat.npy', valid_feats)
np.save('features/resnet18_valid_label.npy', valid_labels)

In [25]:
for iteration, (x, y) in tqdm(enumerate(test_loader)):
    model.eval()
    x, y = x.to(device), y.to(device)
    features = model(x)
    test_feats.append(features.cpu().detach().numpy())
    test_labels.append(y.cpu().detach().numpy())
    del x, y, features

625it [00:08, 74.24it/s]


In [26]:
test_feats = np.concatenate(test_feats)
test_feats.shape

(10000, 512)

In [27]:
test_labels = np.concatenate(test_labels)
test_labels.shape

(10000,)

In [28]:
np.save('features/resnet18_test_feat.npy', test_feats)
np.save('features/resnet18_test_label.npy', test_labels)