<a href="https://colab.research.google.com/github/infomon/understanding_cnn/blob/master/Bottleneck_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install numba
!pip install umap-learn



# Extract Bottleneck Features for Train Set

In [0]:
import keras
from keras.datasets import cifar10
import numpy as np
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
import scipy
from scipy import misc
import os
import cv2


nof_images = 25000

# load the data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = np.squeeze(y_train)
print('data loaded')

# load inceptionV3 model + remove final classification layers
model = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(139, 139, 3))
print('model loaded')

# obtain bottleneck features (train)
if os.path.exists('inception_features_train_1.npz'):
    print('bottleneck features detected (train)')
    features = np.load('inception_features_train_1.npz')['features']
else:
    print('bottleneck features file not detected (train)')
    print('calculating now ...')
    # pre-process the train data
    x_train = x_train[0:nof_images]
    tmp = []
    for i in range(0,len(x_train)):
      tmp.append(cv2.resize(x_train[i], (139, 139)))
      
    big_x_train = np.array(tmp).astype('float32')  
    print("finished")
    #big_x_train = np.array([scipy.misc.imresize(x_train[i], (139, 139, 3)) 
    #                        for i in range(0, len(x_train))]).astype('float32')
    inception_input_train = preprocess_input(big_x_train)
    print('train data preprocessed')
    # extract, process, and save bottleneck features
    features = model.predict(inception_input_train)
    features = np.squeeze(features)
    np.savez('inception_features_train_1', features=features)
print('bottleneck features saved (train)')

Using TensorFlow backend.


data loaded
Instructions for updating:
Colocations handled automatically by placer.


# Plot t-SNE Embedding of Bottleneck Features for Train Set

In [0]:
import umap

In [0]:
#from tsne import bh_sne
import matplotlib.pyplot as plt
%matplotlib inline

# reshape bottleneck features + reduce dimensionality with t-SNE
if os.path.exists('tsne_features.npz'):
    print('tsne features detected (test)')
    tsne_features = np.load('tsne_features.npz')['tsne_features']
else:
    print('tsne features not detected (test)')
    print('calculating now ...')
    tsne_features = umap.UMAP(n_neighbors=5,
                      min_dist=0.3,
                      metric='correlation').fit_transform(np.reshape(features,(features.shape[0],np.prod(features.shape[1:]))))
    
    #tsne_features = bh_sne(features.reshape([features.shape[0], np.prod(features.shape[1:])]).astype('float64')[:25000])
    np.savez('tsne_features_1', tsne_features=tsne_features)
print('tsne features obtained')



In [0]:
labels = ['airplane','automobile','bird','cat','deer','dog','frog','horse',
         'ship','truck']

with np.load("tsne_features.npz") as data:
  tsne_features_1 = data['tsne_features']
  
with np.load("tsne_features_2.npz") as data:
  tsne_features_2 = data['tsne_features']
  
tsne_features = np.concatenate((tsne_features_1,tsne_features_2))

# plot the features
plt.figure()
sc = plt.scatter(tsne_features[:,0], tsne_features[:,1], c=plt.cm.jet(y_train/10), s=10, edgecolors='none')
#plt.colorbar(sc)
plt.show()