In [1]:
# Simple notebook to extract features from images, using the MobileNetv2 model
# output at the prior to classification layer
# Saved as a numpy array for LSTM modelling

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import numpy as np
from pathlib import Path
import pandas as pd
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNetV2
import tensorflow as tf

In [4]:
# If GPU Uncomment
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

Num GPUs Available:  1


In [5]:
fps = '2FPS'
numpy_dir = '../../data/DAiSEE/' + fps + '/data/'
features_dir = os.getcwd() + '/features' 
usage = ['Train', 'Test', 'Validation']

In [6]:
# Make sure features directory exists
if not os.path.exists(features_dir):
    os.makedirs(features_dir)

In [7]:
def get_array(usage, numpy_dir):
    numpy_dir = Path(numpy_dir)
    x = np.load(numpy_dir / f'x_{usage.lower()}.npy', allow_pickle=True)
    y = np.load(numpy_dir / f'y_lab_{usage.lower()}.npy')

    return (x,y)

In [8]:
def get_dataframe(usage, numpy_dir):
    x_files, y = get_array(usage, numpy_dir)
    df_x = pd.DataFrame(x_files, columns = ['file'])
    df_y = pd.DataFrame(y, columns = ['label'])
    df = pd.concat([df_x.reset_index(drop=True),df_y.reset_index(drop=True)], axis=1)
    df['file'] = df['file'].astype(str)
    df['label'] = df['label'].astype(str)
    
    return (df)

In [9]:
df_train = get_dataframe('train', numpy_dir)
df_test = get_dataframe('test', numpy_dir)
df_val = get_dataframe('val', numpy_dir)

In [10]:
df_train.head(-10)

Unnamed: 0,file,label
0,../../data/DAiSEE/2FPS/data/train/b0/110001100...,0
1,../../data/DAiSEE/2FPS/data/train/b0/110001100...,0
2,../../data/DAiSEE/2FPS/data/train/b0/110001100...,0
3,../../data/DAiSEE/2FPS/data/train/b0/110001100...,0
4,../../data/DAiSEE/2FPS/data/train/b0/110001100...,0
...,...,...
107145,../../data/DAiSEE/2FPS/data/train/b3/459999021...,3
107146,../../data/DAiSEE/2FPS/data/train/b3/459999021...,3
107147,../../data/DAiSEE/2FPS/data/train/b3/459999021...,3
107148,../../data/DAiSEE/2FPS/data/train/b3/459999021...,3


In [11]:
# check shapes of df - they should all be divisible by 20
print(df_train.shape, 'divided by 20=', df_train.shape[0]/20)
print(df_test.shape, 'divided by 20=', df_test.shape[0]/20)
print(df_val.shape, 'divided by 20=', df_val.shape[0]/20)

(107160, 2) divided by 20= 5358.0
(35680, 2) divided by 20= 1784.0
(28580, 2) divided by 20= 1429.0


In [12]:
datagen = ImageDataGenerator(rescale=1.0/255.0)


train_generator = datagen.flow_from_dataframe(
dataframe = df_train,
x_col = 'file', 
y_col = 'label',
target_size = (224, 224),
batch_size = 32,
shuffle = False,
class_mode= 'categorical')

val_generator = datagen.flow_from_dataframe(
dataframe = df_val,
x_col = 'file', 
y_col = 'label',
target_size = (224, 224),
batch_size = 32,
shuffle = False,
class_mode= 'categorical')

test_generator = datagen.flow_from_dataframe(
dataframe = df_test,
x_col = 'file', 
y_col = 'label',
target_size = (224, 224),
batch_size = 32,
shuffle = False,
class_mode= 'categorical')


Found 107160 validated image filenames belonging to 4 classes.
Found 28580 validated image filenames belonging to 4 classes.
Found 35680 validated image filenames belonging to 4 classes.


In [13]:
base_model = MobileNetV2(
                weights='imagenet',
                include_top=True
            )

In [14]:
base_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

In [15]:
model = Model(
    inputs=base_model.input,
    outputs=base_model.get_layer('global_average_pooling2d').output
)

In [18]:
features = model.predict_generator(train_generator)
y_train = train_generator.classes
np.save(features_dir + '/train_features.npy', features)
np.save(features_dir + '/train_labels.npy', y_train)
print(len(features))

Instructions for updating:
Please use Model.predict, which supports generators.
107160


In [20]:
val_features = model.predict_generator(val_generator)
y_val = val_generator.classes
np.save(features_dir + '/val_features.npy', val_features)
np.save(features_dir + '/val_labels.npy', y_val)
print(len(val_features))

28580


In [21]:
test_features = model.predict_generator(test_generator)
y_test = test_generator.classes
np.save(features_dir + '/test_features.npy', test_features)
np.save(features_dir + '/test_labels.npy', y_test)
print(len(test_features))

35680
