TODO: Change runtime type to GPU

In [None]:
!pip -q install tensorflow-addons timm einops mmcv

In [None]:
import tensorflow as tf
import numpy as np
import tensorflow_addons as tfa
import sys
import tensorflow_hub as hub


In [None]:
! git clone https://github.com/shoaib6174/GSOC-22-Video-Swin-Transformers
sys.path.append("/content/GSOC-22-Video-Swin-Transformers")

fatal: destination path 'GSOC-22-Video-Swin-Transformers' already exists and is not an empty directory.


In [None]:
from VideoSwinTransformer import  I3DHead_tf , CosineAnnealingWithWarmupSchedule

In [None]:
def get_model(num_classes, backbone ,shape_of_input=(3,32,224,224)):
    inputs = tf.keras.Input(shape_of_input[1:])

    x = backbone(inputs, training= True)
    outputs = I3DHead_tf(num_classes, 768, training=True)(x)
    return tf.keras.Model(inputs, outputs)

## Single-gpu training

In [None]:
batch_size = 1
num_frames = 32
resolution = 224
channels = 3
sample_count = 10
num_classes = 5

shape_of_input = (sample_count, 3, num_frames, resolution,resolution)

In [None]:
videos = tf.random.normal(shape_of_input)
labels = tf.random.uniform(shape=[sample_count], minval=0, maxval=num_classes, dtype='int64')
labels = tf.one_hot(labels, num_classes)

len(videos), videos[0].shape, labels.shape

(10, TensorShape([3, 32, 224, 224]), TensorShape([10, 5]))

In [None]:
num_epochs = 3

warmup_epoch = 2
total_steps = int(num_epochs * sample_count / batch_size)

# Compute the number of warmup batches.
warmup_steps = int(warmup_epoch * sample_count / batch_size)

train_steps = sample_count// batch_size
total_train_steps = train_steps * num_epochs
test_steps = sample_count // batch_size

In [None]:
# loading the tf2 model with converted pre-trained weights
path_to_model_weights = "https://gsoc4108768259.blob.core.windows.net/azureml-blobstore-cf8fa289-ef6f-4db3-b097-1d65257e5a14/swin_tiny_patch244_window877_kinetics400_1k_tf.tar.gz"
backbone = hub.KerasLayer(path_to_model_weights)

In [None]:
model = get_model(num_classes,  backbone,shape_of_input=shape_of_input)

## backbone model has a multiplier of 0.1 for learning rate. To implement that MultiOptimizer is being used
lr_backbone = CosineAnnealingWithWarmupSchedule(learning_rate_base=.001,
                                    total_steps=total_steps,
                                    warmup_learning_rate=0.0,
                                    warmup_steps=warmup_steps,
                                    hold_base_rate_steps=0)
lr_classifier = CosineAnnealingWithWarmupSchedule(learning_rate_base=.01,
                                        total_steps=total_steps,
                                        warmup_learning_rate=0.0,
                                        warmup_steps=warmup_steps,
                                        hold_base_rate_steps=0)

optimizer_backbone = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_backbone, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)
optimizer_classifier = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_classifier, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)

optimizers_and_layers = [(optimizer_backbone, model.layers[1]), (optimizer_classifier, model.layers[2])]
optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

loss_obj = tf.keras.losses.CategoricalCrossentropy(from_logits=True,label_smoothing=0.1,
                                    reduction=tf.keras.losses.Reduction.SUM)

metrics=["top_k_categorical_accuracy"]

model.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)

5 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {'training': True}


In [None]:
results = model.fit(videos,labels,
    epochs=num_epochs,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=[],
    validation_freq=1,
    verbose=1)

Epoch 1/3
lr = 0.0005
lr = 0.0005
Epoch 2/3
Epoch 3/3


## Distributed Training

In [None]:
batch_size = 2
num_frames = 32
resolution = 224
channels = 3
sample_count = 10
num_classes = 5

shape_of_input = (sample_count, 3, num_frames, resolution,resolution)

In [None]:
videos = tf.random.normal(shape_of_input)
labels = tf.random.uniform(shape=[sample_count], minval=0, maxval=num_classes, dtype='int64')
labels = tf.one_hot(labels, num_classes)

len(videos), videos[0].shape, labels.shape

(10, TensorShape([3, 32, 224, 224]), TensorShape([10, 5]))

In [None]:
num_epochs = 3

warmup_epoch = 2
total_steps = int(num_epochs * sample_count / batch_size)

# Compute the number of warmup batches.
warmup_steps = int(warmup_epoch * sample_count / batch_size)

train_steps = sample_count// batch_size
total_train_steps = train_steps * num_epochs
test_steps = sample_count // batch_size

In [None]:
strategy = tf.distribute.MirroredStrategy()




In [None]:
with strategy.scope():

    # loading the tf2 model with converted pre-trained weights
    # loading the tf2 model with converted pre-trained weights
    path_to_model_weights = "https://gsoc4108768259.blob.core.windows.net/azureml-blobstore-cf8fa289-ef6f-4db3-b097-1d65257e5a14/swin_tiny_patch244_window877_kinetics400_1k_tf.tar.gz"
    backbone = hub.KerasLayer(path_to_model_weights)

    model_dist = get_model(num_classes,  backbone,shape_of_input=shape_of_input)

    ## To implement different learning rate for backbone and classifier MultiOptimizer is being used
    lr_backbone = CosineAnnealingWithWarmupSchedule(learning_rate_base=.001,
                                        total_steps=total_steps,
                                        warmup_learning_rate=0.0,
                                        warmup_steps=warmup_steps,
                                        hold_base_rate_steps=0)
    lr_classifier = CosineAnnealingWithWarmupSchedule(learning_rate_base=.01,
                                            total_steps=total_steps,
                                            warmup_learning_rate=0.0,
                                            warmup_steps=warmup_steps,
                                            hold_base_rate_steps=0)

    optimizer_backbone = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_backbone, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)
    optimizer_classifier = tfa.optimizers.AdamW(weight_decay= 0.05,learning_rate=lr_classifier, beta_1= 0.9, beta_2=0.999, epsilon=1e-8)

    optimizers_and_layers = [(optimizer_backbone, model.layers[1]), (optimizer_classifier, model.layers[2])]
    optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

    loss_obj = tf.keras.losses.CategoricalCrossentropy(from_logits=True,label_smoothing=0.1,
                                        reduction=tf.keras.losses.Reduction.SUM)

    metrics=["top_k_categorical_accuracy"]

    model_dist.compile(loss=loss_obj, optimizer=optimizer, metrics=metrics)

5 768 {'type': 'CrossEntropyLoss'} avg 0.5 0.01 {'training': True}


In [None]:
results = model_dist.fit(videos,labels,
    epochs=num_epochs,
    steps_per_epoch=train_steps,
    validation_steps=test_steps,
    callbacks=[],
    validation_freq=1,
    verbose=1)

Epoch 1/3
Epoch 2/3
Epoch 3/3
