### Feature extraction of STL-10 dataset using pretrained ResNet-50

In [None]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import resnet50
import scipy.io as sio



# ResNet50
# stl10
myTransforms = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    ])


# load stl10
train_dataset = torchvision.datasets.STL10(root='dataset/stl/', split='train', download=True, transform=myTransforms)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=False, num_workers=2)

test_dataset = torchvision.datasets.STL10(root='dataset/stl/', split='test', download=True, transform=myTransforms)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)


myModel = resnet50(pretrained=True) # suggest: download resnet50-11ad3fa6.pth first
# myModel.load_state_dict(torch.load('pretrained/resnet50-11ad3fa6.pth')) # == ResNet50_Weights.IMAGENET1K_V2
myModel.fc = nn.Identity()
myDevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
myModel = myModel.to(myDevice)
myModel.eval()

train_features = []
test_features = []
train_labels = []
test_labels = []

with torch.no_grad():
    print('extract train set ...')
    for imgs, labels in train_loader:
        outputs = myModel(imgs.to(myDevice))
        train_features.append(outputs)
        train_labels.append(labels)
    
    print('extract test set ...')
    for imgs, labels in test_loader:
        outputs = myModel(imgs.to(myDevice))
        test_features.append(outputs)
        test_labels.append(labels)

train_features = torch.cat(train_features).squeeze()
train_labels = torch.cat(train_labels)
test_features = torch.cat(test_features).squeeze()
test_labels = torch.cat(test_labels)

print(train_features.shape)
print(train_labels.shape)
print(test_features.shape)
print(test_labels.shape)

train_features = train_features.cpu().numpy()
test_features = test_features.cpu().numpy()
train_labels = train_labels.numpy()
test_labels = test_labels.numpy()

data_dict = {
    'train_set':train_features,
    'train_label':train_labels,
    'test_set':test_features,
    'test_label':test_labels,
}
# save in *.mat with data dimension 2048
sio.savemat('dataset/stl/stl10-extract-features.mat', data_dict)


In [None]:
from torch.utils.data import Dataset
import scipy.io as scio

class STL10Dataset(Dataset):
    '''STL10 extract features dataset torchvision wrapper
        Args:
            data_path: path/to/save/the/stl10-extract-features.mat (filename not included)
                        --> call name: train_set, train_label, test_set, test_label 
            train: whether use train set
        Outputs:
            data: shape (batch_size, 2048) imgs features from ResNet50
            labels: shape (batch_size, 1) indicators of text categories
    '''
    def __init__(self, data_path, train=True):
        # load data and labels from data_path
        self.content=scio.loadmat(data_path+'stl10-extract-features.mat')

        if train is True:
            self.data = self.content['train_set']
            self.labels = self.content['train_label'].squeeze()
        else:
            self.data = self.content['test_set']
            self.labels = self.content['test_label'].squeeze()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        # get data and label at idx
        x = self.data[idx]
        y = self.labels[idx]
        # convert data to tensor and normalize
        x = torch.tensor(x, dtype=torch.float32)
        # return data and label as tuple
        return x, y
    
stl10_train_set = STL10Dataset(data_path='dataset/stl/', train=True)
stl10_test_set = STL10Dataset(data_path='dataset/stl/', train=False)