In [1]:
% load_ext autoreload
% autoreload 2

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model

from vision.pre_processing.load_frames_from_videos import FramesExtractor

## Testing VGG16 Fine Tuning

In [47]:
IMG_SIZE = (224, 224, 3)

In [48]:
feature_extractor = tf.keras.applications.vgg16.VGG16(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=IMG_SIZE,
    pooling='avg',
)

In [49]:
feature_extractor.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [50]:
import numpy as np

In [51]:
sample = np.ones(shape=IMG_SIZE)
sample = np.expand_dims(sample, axis=0)

In [52]:
sample.shape

(1, 224, 224, 3)

In [53]:
# sample=preprocess_input(sample)
# sample.shape
# sample

In [54]:
out = feature_extractor.predict(sample)

2022-07-23 16:32:40.929113: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [55]:
out.shape

(1, 512)

In [56]:
feature_extractor

<keras.engine.functional.Functional at 0x15e837c70>

In [57]:
# Freeze four convolution blocks
for layer in feature_extractor.layers[:15]:
    layer.trainable = False
# Make sure you have frozen the correct layers
for i, layer in enumerate(feature_extractor.layers):
    print(i, layer.name, layer.trainable)

0 input_3 False
1 block1_conv1 False
2 block1_conv2 False
3 block1_pool False
4 block2_conv1 False
5 block2_conv2 False
6 block2_pool False
7 block3_conv1 False
8 block3_conv2 False
9 block3_conv3 False
10 block3_pool False
11 block4_conv1 False
12 block4_conv2 False
13 block4_conv3 False
14 block4_pool False
15 block5_conv1 True
16 block5_conv2 True
17 block5_conv3 True
18 block5_pool True
19 global_average_pooling2d_2 True


In [58]:
x = feature_extractor.output
x = Flatten()(x)
x = Dense(units=512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(units=256, activation='relu')(x)
x = Dense(units=3, activation='softmax')(x)

transfer_model = Model(inputs=feature_extractor.input, outputs=x)

In [59]:
transfer_model.predict(np.expand_dims(X[0], axis=0))



2022-07-23 16:32:41.249846: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


array([[0.18690722, 0.31540653, 0.49768624]], dtype=float32)

In [60]:
X = np.ones(shape=(1000, 224, 224, 3))
X.shape

(1000, 224, 224, 3)

In [61]:
y = [1] * 300 + [2] * 300 + [3] * 400
y = np.array(y)
y.shape

(1000,)

In [62]:
transfer_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics='acc')

In [63]:
transfer_model.fit(epochs=10, x=X, y=y)

Epoch 1/10


2022-07-23 16:32:43.112613: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

KeyboardInterrupt: 

In [68]:
last = len(transfer_model.layers)
last

25

In [70]:
type(transfer_model.layers)

list

In [90]:
#transfer_model.layers[-5:]
from tensorflow.keras.models import Sequential

feature_extractor_tuned = Sequential(transfer_model.layers[:-5])

In [91]:
feature_extractor_tuned.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)      

In [87]:
feature_extractor_tuned.predict(np.expand_dims(X[0], axis=0))

2022-07-23 16:50:43.798224: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




array([[0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.0526115e+08,
        0.0000000e+00, 5.0299830e+08, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 5.0351366e+08,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        5.0261837e+08, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        4.9332826e+08, 0.0000000e+00, 5.0386250e+08, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 5.0380179e+08, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e

## Actual fine tuning pipeline

In [93]:
feature_extractor = tf.keras.applications.vgg16.VGG16(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=IMG_SIZE,
    pooling='avg',
)

feature_extractor.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
fe = FramesExtractor(videos_folder="../../dataset/data/test_videos", labels="../../dataset/labels.xlsx",
                     ground_truth_folder="../vision_dataset/ground_truth")
extracted_frames, ground_truth = fe.extract_frames()
print('one frame shape: ', extracted_frames[0].shape)
print('extracted_frames shape: ', extracted_frames.shape)
print('ground truth shape: ', ground_truth.shape)

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

2022-07-23 19:01:19.158463: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-23 19:01:19.158778: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1


  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/4920 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/3960 [00:00<?, ?it/s]

  0%|          | 0/3960 [00:00<?, ?it/s]

In [6]:
ground_truth.head()

Unnamed: 0,micro_labels,macro_labels,ar_labels,frame_name
0,lie_still,lying_down,actor_repositioning,actor_1_bed_full_ph_cam_1_0000
1,lie_still,lying_down,actor_repositioning,actor_1_bed_full_ph_cam_1_0001
2,lie_still,lying_down,actor_repositioning,actor_1_bed_full_ph_cam_1_0002
3,lie_still,lying_down,actor_repositioning,actor_1_bed_full_ph_cam_1_0003
4,lie_still,lying_down,actor_repositioning,actor_1_bed_full_ph_cam_1_0004
