In [1]:
import glob 
import os 
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import numpy as np

import pandas as pd 
from torch.utils.data import Dataset, DataLoader
from torchvision import  utils

from PIL import Image

In [2]:
cwd = os.getcwd()
use_cuda = torch.cuda.is_available()
torch.manual_seed(123)
device = torch.device("cuda" if use_cuda else "cpu")

feature_size=2048
cnn_feature_extractor=torchvision.models.resnet50(pretrained=True) #resnet50 fc is for 1000 calsses
modules = list(cnn_feature_extractor.children())[:-1] # delete the last fc layer.
cnn_feature_extractor = nn.Sequential(*modules).to(device)

# set requires_grad to false
for param in cnn_feature_extractor.parameters():
    param.requires_grad = False
#print(cnn_feature_extractor)

In [3]:
target_domain='sketch' # change here 
#domain_name=['sketch','quickdraw','infograph','real']
domain_name=['real']
#domain_name.remove(target_domain)

class_name=[file  for file in os.listdir(domain_name[0]) if file[-3:] !='csv' ]

csv_name_train={name: pd.read_csv(cwd+'/'+name+'/'+name+ '_train.csv',index_col=0) for name in domain_name}
csv_name_test={target_domain: pd.read_csv(cwd+'/'+target_domain+'/'+target_domain+ '_test.csv',index_col=0)}




In [4]:
train_cvs=pd.concat(csv_name_train[key] for key in domain_name)
test_csv=csv_name_test[target_domain]


In [5]:
color_transform=transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
            ])

class finalset(Dataset):
    def __init__(self, train_cvs,train, transform=None):
        """ Intialize the MNIST dataset """
        self.images = None
        self.labels = None
        self.csv=train_cvs
        self.filenames = list(train_cvs.index)
        self.train=train
        self.transform = transform
        self.len = len(self.filenames)                      
    def __getitem__(self, index):

        """ Get a sample from the dataset """
        
        image_fn=self.filenames[index]
        image = Image.open(image_fn)
    
        if image.mode != 'RGB':
            image = np.expand_dims(image, axis=2)
            image=np.concatenate((image,image,image),axis=2)

        if self.transform is not None:
            image = self.transform(image)
        
        if self.train is True:
            label=self.csv.loc[image_fn,'label']
            return image,label,image_fn
        if self.train is False:
            return image, -1, image_fn
    
    def __len__(self):
        return self.len  

train_data  =finalset(train_cvs,train=True,transform=color_transform)
trainloader = DataLoader(train_data, batch_size=64,shuffle=True) 

test_data  =finalset(test_csv,train=True,transform=color_transform)
testloader = DataLoader(test_data, batch_size=64,shuffle=True) 

In [17]:
batch_num=20
def extract_feature(dalaloader):
    train_list=[]
    for batch_idx, data in enumerate(trainloader):
        print(batch_idx)
        if batch_idx >= batch_num:
        #o get only 512 samples
                break
        input1, label1, filenames = data
        input1= input1.cuda()
        feature=cnn_feature_extractor(input1)
        train_list.append(feature)
    return train_list,label1,filenames
train_list=extract_feature(trainloader)
test_list=extract_feature(testloader)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [14]:
import pickle
with open("./train_features_resnet50.pkl", "wb") as f:
    pickle.dump(train_list, f)
with open("./valid_features_resnet50.pkl", "wb") as f:
    pickle.dump(test_list, f)

In [8]:
# import pickle
# with open("./train_features_resnet50.pkl", "rb") as f:
#     train_list=pickle.load( f)
# with open("./valid_features_resnet50.pkl", "rb") as f:
#     test_list=pickle.load( f)

In [19]:
def feature_to_array():
    img_feautures = []
    for seq_feature in img_list:
        img_feautures.append(seq_feature.cpu().numpy())


    img_features = np.array(img_feautures)
    img_features=np.reshape(img_features,(20*64,-1))
    return(img_features)
train_featue=feature_to_array(train_list)
test_feature=feature_to_array(test_list)

[tensor([[[[0.0056]],

         [[1.0340]],

         [[0.6207]],

         ...,

         [[0.1004]],

         [[0.1289]],

         [[0.0769]]],


        [[[0.1502]],

         [[0.3957]],

         [[0.5806]],

         ...,

         [[0.4907]],

         [[0.9848]],

         [[0.9342]]],


        [[[0.6719]],

         [[0.1203]],

         [[0.7249]],

         ...,

         [[0.1111]],

         [[0.2760]],

         [[0.4008]]],


        ...,


        [[[1.2414]],

         [[0.7586]],

         [[1.5587]],

         ...,

         [[0.2289]],

         [[0.2429]],

         [[1.4113]]],


        [[[0.0084]],

         [[1.8643]],

         [[0.5816]],

         ...,

         [[0.0958]],

         [[0.0270]],

         [[0.0132]]],


        [[[0.1280]],

         [[0.0763]],

         [[0.2170]],

         ...,

         [[0.5059]],

         [[0.8160]],

         [[0.0866]]]], device='cuda:0'), tensor([[[[0.2717]],

         [[0.4168]],

         [[0.6184]],

       

AttributeError: 'list' object has no attribute 'cpu'

In [None]:
img_features=np.concatenate([train_feature,test_feature])

In [None]:
print(img_features.shape)

In [None]:
from sklearn.manifold import TSNE
CNN_features_2d = TSNE(n_components=2, perplexity=30).fit_transform(img_features)


In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
CNN_features_2d = pca.fit_transform(img_features)
#principalDf = pd.DataFrame(data = principalComponents
#             , columns = ['principal component 1', 'principal component 2'])

In [None]:
valid_y=np.concatenate([np.zeros(20*64),np.ones(20*64)])

In [None]:
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
cm = plt.cm.get_cmap("tab20", 11)
plt.figure(figsize=(10,5))
plt.scatter(CNN_features_2d[:,0], CNN_features_2d[:,1],s=20 , c=valid_y,cmap=cm,alpha=0.7)
plt.colorbar(ticks=range(11))
plt.clim(-0.5, 10.5)
plt.savefig("CNN_tsne.png")
plt.show()