In [85]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import glob
import re
import math
from time import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard, CSVLogger#, LearningRateScheduler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
#from sklearn.datasets import make_multilabel_classification
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten
# import tensorflow_hub as hub
from sklearn.metrics import mean_absolute_error,mean_squared_error
from tensorflow.keras.models import load_model
import tensorflow.keras.backend as K
from model8 import model
import argparse
import gc
import random

In [86]:
# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [87]:
weight_dir = '/content/drive/MyDrive/weights_C3D_sports1M_tf.h5'

In [88]:
def calculate_mean_std(x, channels_first=False, verbose=0):
    """
    Calculates channel-wise mean and std
    
    Parameters
    ----------
    x : array
        Array representing a collection of images (frames) or
        collection of collections of images (frames) - namely video
    channels_first : bool, optional
        Leave False, by default False
    verbose : int, optional
        1-prints out details, 0-silent mode, by default 0
    
    Returns
    -------
    array of shape [2, num_channels]
        Array with per channel mean and std for all the frames
    """
    ndim = x.ndim
    assert ndim in [5,4]
    assert channels_first == False
    all_mean = []
    all_std = []    
    num_channels = x.shape[-1]
    
    for c in range(0, num_channels):
        if ndim ==5: # videos
            mean = x[:,:,:,:,c].mean()
            std = x[:,:,:,:,c].std()
        elif ndim ==4: # images rgb or grayscale
            mean = x[:,:,:,c].mean()
            std = x[:,:,:,c].std()
        if verbose:
            print("Channel %s mean before: %s" % (c, mean))   
            print("Channel %s std before: %s" % (c, std))
            
        all_mean.append(mean)
        all_std.append(std)
    
    return np.stack((all_mean, all_std))


def preprocess_input(x, mean_std, divide_std=False, channels_first=False, verbose=0):
    """
    Channel-wise substraction of mean from the input and optional division by std
    
    Parameters
    ----------
    x : array
        Input array of images (frames) or videos
    mean_std : array
        Array of shape [2, num_channels] with per-channel mean and std
    divide_std : bool, optional
        Add division by std or not, by default False
    channels_first : bool, optional
        Leave False, otherwise not implemented, by default False
    verbose : int, optional
        1-prints out details, 0-silent mode, by default 0
    
    Returns
    -------
    array
        Returns input array after applying preprocessing steps
    """
    x = np.asarray(x, dtype=np.float32)    
    ndim = x.ndim
    assert ndim in [5,4]
    assert channels_first == False
    num_channels = x.shape[-1]
    
    for c in range(0, num_channels):  
        if ndim ==5: # videos
            x[:,:,:,:,c] -= mean_std[0][c]
            if divide_std:
                x[:,:,:,:,c] /= mean_std[1][c]
            if verbose:
                print("Channel %s mean after preprocessing: %s" % (c, x[:,:,:,:,c].mean()))    
                print("Channel %s std after preprocessing: %s" % (c, x[:,:,:,:,c].std()))
        elif ndim ==4: # images rgb or grayscale
            x[:,:,:,c] -= mean_std[0][c]
            if divide_std:
                x[:,:,:,c] /= mean_std[1][c]   
            if verbose:        
                print("Channel %s mean after preprocessing: %s" % (c, x[:,:,:,c].mean()))    
                print("Channel %s std after preprocessing: %s" % (c, x[:,:,:,c].std()))            
    return x


In [89]:
files = glob.glob('/content/drive/MyDrive/training_arr/*.avi')
files_val = glob.glob('/content/drive/MyDrive/validation_arr/*.avi')
print(str(files[5]))
print(str(files_val[5]))

training_labels = []
training_files = []
val_labels = []
val_files = []

unseen = ['recieve']

/content/drive/MyDrive/training_arr/teacher9763_clipped.avi
/content/drive/MyDrive/validation_arr/argue3147_clipped.avi


In [90]:
for file in files:
    label = re.findall('[A-Za-z]+[0-9]',str(file))[0][:-1]
    training_labels.append(label)
    training_files.append(str(file))

for file in files_val:
    label = re.findall('[A-Za-z]+[0-9]',str(file))[0][:-1]
    val_labels.append(label)
    val_files.append(str(file))


In [91]:
training_data = pd.DataFrame({'filename':training_files,'training_labels':training_labels})
print(training_data)
val_data = pd.DataFrame({'filename':val_files,'val_labels':val_labels})
val_data = val_data[val_data['val_labels']!= 'receive']
print(val_data)

label_encoder = LabelEncoder()

training_data['encoded_labels'] = label_encoder.fit_transform(training_data['training_labels'])
val_data['encoded_labels'] = label_encoder.transform(val_data['val_labels'])
print(training_data)
print(val_data)
training_data.to_csv('training_words.csv')
val_data.to_csv('val_words.csv')

                                               filename training_labels
0     /content/drive/MyDrive/training_arr/teacher974...         teacher
1     /content/drive/MyDrive/training_arr/teacher976...         teacher
2     /content/drive/MyDrive/training_arr/teacher933...         teacher
3     /content/drive/MyDrive/training_arr/teacher974...         teacher
4     /content/drive/MyDrive/training_arr/television...      television
...                                                 ...             ...
5328  /content/drive/MyDrive/training_arr/beautiful2...       beautiful
5329  /content/drive/MyDrive/training_arr/beautiful2...       beautiful
5330  /content/drive/MyDrive/training_arr/beautiful8...       beautiful
5331  /content/drive/MyDrive/training_arr/beautiful7...       beautiful
5332  /content/drive/MyDrive/training_arr/beautiful7...       beautiful

[5333 rows x 2 columns]
                                               filename val_labels
0     /content/drive/MyDrive/validation_arr/

In [92]:
videos_data = []


In [93]:
# i=1

# for file in training_data['filename']:
    
    
#     resize=(112, 112)
#     if i%100 == 0:
#       print(i)
#       print(str(file))

#     cap = cv2.VideoCapture(str(file))
#     ret = True
#     frames=[]

#     while ret == True:
#         ret,frame = cap.read()
#         if ret == True:
#             frame = cv2.resize(frame,resize)
#             frames.append(frame)
        
#     video = np.stack(frames,axis=0)
#     frames,length,width,channels = video.shape


#     video = video[list(np.linspace(0,frames-1,16,dtype=int))]
    
#     mean_std = calculate_mean_std(video, channels_first=False, verbose=0)
#     video = preprocess_input(video, mean_std, divide_std=False, channels_first=False, verbose=0)
#     videos_data.append(video)
#     cap.release()
#     i += 1
    

# cv2.destroyAllWindows()

In [94]:
def gen_video_prep(file_loc, size):

    resize = size

    cap = cv2.VideoCapture(str(file_loc))
    ret = True
      
    frames=[]

    while ret == True:
        ret, frame = cap.read()
        if ret == True:
            frame = cv2.resize(frame,resize)
            num = np.random.randint(0,100)
            if num < 20:
                frame = np.flip(frame,axis=1)
            frames.append(frame)
          
    video = np.stack(frames,axis=0)
    frames, channels = video.shape[0], video.shape[3]

    frame_total = 16
    if frames >= frame_total:
        frame_list = list(range(0,frames))
        random.shuffle(frame_list)
        frame_list = frame_list[:16]
        frame_list.sort(reverse = False)
    else:
        frame_list = list(np.linspace(0,frames-1,16,dtype=int))

    #video = video[list(np.linspace(0,frames-1,16,dtype=int))]
    video = video[frame_list]

    mean_std = calculate_mean_std(video, channels_first=False, verbose=0)

    video = preprocess_input(video, mean_std, divide_std=False, channels_first=False, verbose=0)

    cap.release()
      

    cv2.destroyAllWindows()
  
    return video

In [95]:
def val_preprocess(files, size):
    
    vids = []

    i = 1
    for file in files:
    
        resize = size

        if i%100 == 0:
            print(i)
            print(str(file))

        cap = cv2.VideoCapture(str(file))
        ret = True
        
        frames=[]

        while ret == True:
            ret, frame = cap.read()
            if ret == True:
                frame = cv2.resize(frame,resize)
                frames.append(frame)
            
        video = np.stack(frames,axis=0)
        frames, channels = video.shape[0], video.shape[3]

        frame_total = 16
        if frames >= frame_total:
            frame_list = list(range(0,frames))
            random.shuffle(frame_list)
            frame_list = frame_list[:16]
            frame_list.sort(reverse = False)
        else:
            frame_list = list(np.linspace(0,frames-1,16,dtype=int))

        #video = video[list(np.linspace(0,frames-1,16,dtype=int))]
        video = video[frame_list]

        mean_std = calculate_mean_std(video, channels_first=False, verbose=0)

        video = preprocess_input(video, mean_std, divide_std=False, channels_first=False, verbose=0)
        vids.append(video)

        i += 1

        cap.release()
        

        cv2.destroyAllWindows()
    
    return vids

In [96]:
test_vid = gen_video_prep(training_data['filename'].iloc[0], (112, 112))

In [97]:
test_vid.shape

(16, 112, 112, 3)

In [98]:
# for step in range(num_steps):
#     # Pick an offset within the training data, which has been randomized.
#     # Note: we could use better randomization across epochs.
#     offset = (step * batch_size) % (train_labels.shape[0] - batch_size)

#     # Generate a minibatch.
#     batch_data = train_dataset[offset:(offset + batch_size), :]
#     batch_labels = train_labels[offset:(offset + batch_size), :]

#     # Prepare a dictionary telling the session where to feed the minibatch.
#     # The key of the dictionary is the placeholder node of the graph to be fed,
#     # and the value is the numpy array to feed to it.
#     feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}

class My_Custom_Generator(tf.keras.utils.Sequence) :
  
  def __init__(self, video_filenames, labels, batch_size) :
      self.video_filenames = video_filenames
      self.labels = labels
      self.batch_size = batch_size
    
    
  def __len__(self) :
      return (np.ceil(len(self.video_filenames) / float(self.batch_size))).astype(np.int)
  
  
  def __getitem__(self, idx) :
      batch_x = self.video_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
      batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
      return np.array([
              gen_video_prep(file_name, (112, 112)) for file_name in batch_x]), np.array(batch_y)

In [99]:
# with tpu_strategy.scope(): # creating the model in the TPUStrategy scope means we will train the model on the TPU
#   conv_model = model(weight_dir, trainable = True, freeze_layer = 0)
#   conv_model = conv_model.retrainable_model((3, 3, 3), (16, 112, 112, 3))
#   conv_model.summary()
#   conv_model.compile(optimizer='adam',
#                 loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#                 metrics=['sparse_categorical_accuracy'])



In [100]:
# conv_model = conv_model.retrainable_model((3, 3, 3), (16, 112, 112, 3))
# conv_model.summary()



In [101]:
conv_model = model(weight_dir, trainable = True, freeze_layer = 0)
conv_model = conv_model.retrainable_model((3, 3, 3), (16, 112, 112, 3))
conv_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Conv1 (Conv3D)               (None, 16, 112, 112, 64)  5248      
_________________________________________________________________
Pool1 (MaxPooling3D)         (None, 16, 56, 56, 64)    0         
_________________________________________________________________
Conv2 (Conv3D)               (None, 16, 56, 56, 128)   221312    
_________________________________________________________________
Pool2 (MaxPooling3D)         (None, 8, 28, 28, 128)    0         
_________________________________________________________________
Conv3a (Conv3D)              (None, 8, 28, 28, 256)    884992    
_________________________________________________________________
Conv3b (Conv3D)              (None, 8, 28, 28, 256)    1769728   
_________________________________________________________________
Pool3 (MaxPooling3D)         (None, 4, 14, 14, 256)    0

In [102]:
y = np.asarray(training_data['encoded_labels'].values)


In [103]:
print(y.shape)
y = to_categorical(y)
print(y.shape)
y_val = to_categorical(np.asarray(val_data['encoded_labels'].values))
print(y_val.shape)



(5333,)
(5333, 298)
(1193, 298)


In [104]:
gen = My_Custom_Generator(training_data['filename'].tolist(), y, 32)
val_imgs = val_preprocess(val_data['filename'].tolist(), (112,112))

100
/content/drive/MyDrive/validation_arr/arrive246_clipped.avi
200
/content/drive/MyDrive/validation_arr/buy3391_clipped.avi
300
/content/drive/MyDrive/validation_arr/deaf356_clipped.avi
400
/content/drive/MyDrive/validation_arr/from179_clipped.avi
500
/content/drive/MyDrive/validation_arr/important1774_clipped.avi
600
/content/drive/MyDrive/validation_arr/monkey4123_clipped.avi
700
/content/drive/MyDrive/validation_arr/orange1110_clipped.avi
800
/content/drive/MyDrive/validation_arr/room1444_clipped.avi
900
/content/drive/MyDrive/validation_arr/sometimes2442_clipped.avi
1000
/content/drive/MyDrive/validation_arr/summer1858_clipped.avi
1100
/content/drive/MyDrive/validation_arr/umbrella313_clipped.avi


In [105]:
# tf.config.run_functions_eagerly(True)

In [106]:
model_path = '/content/drive/MyDrive/model'
log_path = '/content/drive/MyDrive/model_log' 
model_name = 'weights.best.{epoch:03d}-{accuracy:.4f}.hdf5'
tb_path = '/content/drive/MyDrive/tb_logs'

In [107]:
# def scheduler(epoch, lr):

#     if epoch < 50:
#         return lr
#     elif epoch == 50:
#         return lr*0.2
#     elif epoch == 75:
#         return lr*0.5
#     elif epoch == 100:
#         return lr*0.2
#     elif epoch == 125:
#         return lr*0.5
#     elif epoch == 150:
#         return lr*0.2

In [108]:
callbacks = [
    ModelCheckpoint(
        filepath = os.path.join(model_path, model_name),
        monitor = 'val_accuracy', 
        save_best_only = True, 
        mode = 'max'
        ),

    CSVLogger(
        filename=os.path.join(log_path, 'log.csv'), 
        separator = ',', 
        append = True
        ),

    EarlyStopping(
        monitor = 'val_loss',
        patience = 10
        ),

    TensorBoard(
        log_dir = tb_path,
        histogram_freq = 10,
        write_graph = True,
        write_images = True,
        write_steps_per_second = False,
        update_freq = 'epoch',
        profile_batch = 0,
        embeddings_freq = 0,
        embeddings_metadata = None
        )#,

    # LearningRateScheduler(
    #     schedule = scheduler
    #     ),
    ]



In [109]:
#K.set_value(conv_model.optimizer.lr, 5e-4) # 5e-2, 1e-2, 5e-3, 1e-3, 5e-4, 1e-4

In [None]:
start = time()
conv_model.fit(gen,epochs=400,batch_size=32,callbacks=callbacks,validation_data=(val_imgs,y_val))
print(time()-start)

Epoch 1/400
 11/167 [>.............................] - ETA: 31:00 - loss: 23.0287 - accuracy: 0.0000e+00

In [None]:
#new_model = load_model('/content/drive/MyDrive/model/weights.best.011-0.5702.hdf5')


In [None]:
#new_model.fit(gen,epochs=400,batch_size=32,callbacks=callbacks)

In [None]:
#new_model.summary()

In [None]:
from matplotlib import pyplot

# retrieve weights from the 3rd Conv3D layer
filters, biases = new_model.layers[0].get_weights()

# normalize filter values to 0-1 so we can visualize them
f_min, f_max = filters.min(), filters.max()
filters = (filters - f_min) / (f_max - f_min)
# plot first few filters
# n_filters = outgoing channels
outgoing_channels = 2
n_filters, ix = outgoing_channels, 1
for i in range(n_filters):
    # get the filter
    f = filters[:, :, :, :, i]
    # plot each channel separately
    # Range of incoming channels
    incoming_channels = 3
    for j in range(incoming_channels):
        # Range of Depth of the kernel .i.e. 3
        Depth = 3
        for k in range(Depth):
            #pyplot.figure(figsize=(20,9))
            # specify subplot and turn of axis
            ax = pyplot.subplot((outgoing_channels*3), incoming_channels, ix)
            ax.set_xticks([])
            ax.set_yticks([])
            # plot filter channel in grayscale
            pyplot.imshow(f[:, :, k,j], cmap='gray')
            ix += 1
# show the figure

pyplot.show()

In [None]:

#print(new_model.layers)
successive_outputs = [layer.output for layer in new_model.layers[:2]]
print(new_model.input)
visualization_model = tf.keras.models.Model(new_model.input, successive_outputs )
x = test_vid.reshape(1,16,112,112,3)
successive_feature_maps = visualization_model.predict(x)
layer_names = [layer.name for layer in new_model.layers]

for layer_name, feature_map in zip(layer_names, successive_feature_maps):

    if len(feature_map.shape) == 5:
        n_features = feature_map.shape[-1] 
        size=feature_map.shape[2]
        display_grid = np.zeros((size, size * n_features))
        for frame in range(1):
            for i in range(n_features):#n_features
              x  = feature_map[0, frame, :, :, i]
              x -= x.mean()
              x /= x.std ()
              x *=  64
              x += 128
              x  = np.clip(x, 0, 255).astype('uint8')
              # Tile each filter into a horizontal grid
              display_grid[:, i * size : (i + 1) * size] = x

            scale = 20. / n_features
            plt.figure(figsize=(scale * n_features, scale))#n_features
            plt.title (layer_name)
            plt.grid(False)
            plt.imshow(display_grid, aspect='auto', cmap='magma')
            plt.savefig(f'/content/drive/MyDrive/feature_maps/maps{frame}.png',dpi=1500)