In [1]:
import tensorflow_hub as hub
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import glob
import re
import math
from time import time
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.datasets import make_multilabel_classification
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten
# import tensorflow_hub as hub
from sklearn.metrics import mean_absolute_error,mean_squared_error
from tensorflow.keras.models import load_model

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
def calculate_mean_std(x, channels_first=False, verbose=0):
    """
    Calculates channel-wise mean and std
    
    Parameters
    ----------
    x : array
        Array representing a collection of images (frames) or
        collection of collections of images (frames) - namely video
    channels_first : bool, optional
        Leave False, by default False
    verbose : int, optional
        1-prints out details, 0-silent mode, by default 0
    
    Returns
    -------
    array of shape [2, num_channels]
        Array with per channel mean and std for all the frames
    """
    ndim = x.ndim
    assert ndim in [5,4]
    assert channels_first == False
    all_mean = []
    all_std = []    
    num_channels = x.shape[-1]
    
    for c in range(0, num_channels):
        if ndim ==5: # videos
            mean = x[:,:,:,:,c].mean()
            std = x[:,:,:,:,c].std()
        elif ndim ==4: # images rgb or grayscale
            mean = x[:,:,:,c].mean()
            std = x[:,:,:,c].std()
        if verbose:
            print("Channel %s mean before: %s" % (c, mean))   
            print("Channel %s std before: %s" % (c, std))
            
        all_mean.append(mean)
        all_std.append(std)
    
    return np.stack((all_mean, all_std))


def preprocess_input(x, mean_std, divide_std=False, channels_first=False, verbose=0):
    """
    Channel-wise substraction of mean from the input and optional division by std
    
    Parameters
    ----------
    x : array
        Input array of images (frames) or videos
    mean_std : array
        Array of shape [2, num_channels] with per-channel mean and std
    divide_std : bool, optional
        Add division by std or not, by default False
    channels_first : bool, optional
        Leave False, otherwise not implemented, by default False
    verbose : int, optional
        1-prints out details, 0-silent mode, by default 0
    
    Returns
    -------
    array
        Returns input array after applying preprocessing steps
    """
    x = np.asarray(x, dtype=np.float32)    
    ndim = x.ndim
    assert ndim in [5,4]
    assert channels_first == False
    num_channels = x.shape[-1]
    
    for c in range(0, num_channels):  
        if ndim ==5: # videos
            x[:,:,:,:,c] -= mean_std[0][c]
            if divide_std:
                x[:,:,:,:,c] /= mean_std[1][c]
            if verbose:
                print("Channel %s mean after preprocessing: %s" % (c, x[:,:,:,:,c].mean()))    
                print("Channel %s std after preprocessing: %s" % (c, x[:,:,:,:,c].std()))
        elif ndim ==4: # images rgb or grayscale
            x[:,:,:,c] -= mean_std[0][c]
            if divide_std:
                x[:,:,:,c] /= mean_std[1][c]   
            if verbose:        
                print("Channel %s mean after preprocessing: %s" % (c, x[:,:,:,c].mean()))    
                print("Channel %s std after preprocessing: %s" % (c, x[:,:,:,c].std()))            
    return x


In [3]:
files = glob.glob('/content/drive/MyDrive/training_arr/*.avi') 
print(str(files[5]))

training_labels = []
training_files = []

/content/drive/MyDrive/training_arr/television10367_clipped.avi


In [4]:
for file in files:
    label = re.findall('[A-Za-z]+[0-9]',str(file))[0][:-1]
    training_labels.append(label)
    training_files.append(str(file))

In [5]:
training_data = pd.DataFrame({'filename':training_files,'training_labels':training_labels})
print(training_data)

print(training_data)
label_encoder = LabelEncoder().fit_transform(training_data['training_labels'])
training_data['encoded_labels'] = label_encoder
print(training_data)
training_data.to_csv('training_words.csv')

                                               filename training_labels
0     /content/drive/MyDrive/training_arr/teacher778...         teacher
1     /content/drive/MyDrive/training_arr/teacher974...         teacher
2     /content/drive/MyDrive/training_arr/teacher976...         teacher
3     /content/drive/MyDrive/training_arr/teacher933...         teacher
4     /content/drive/MyDrive/training_arr/teacher974...         teacher
...                                                 ...             ...
5328  /content/drive/MyDrive/training_arr/beautiful2...       beautiful
5329  /content/drive/MyDrive/training_arr/beautiful2...       beautiful
5330  /content/drive/MyDrive/training_arr/beautiful2...       beautiful
5331  /content/drive/MyDrive/training_arr/beautiful8...       beautiful
5332  /content/drive/MyDrive/training_arr/beautiful7...       beautiful

[5333 rows x 2 columns]
                                               filename training_labels
0     /content/drive/MyDrive/training_a

In [6]:
videos_data = []

def gen_video_prep(file_loc, size):
  

    resize = size

    cap = cv2.VideoCapture(str(file_loc))
    ret = True
      
    frames=[]

    while ret == True:
        ret, frame = cap.read()
        if ret == True:
            frame = cv2.resize(frame,resize)
            frame = frame / 255.0
            frames.append(frame)
          
    video = np.stack(frames,axis=0)
    frames, channels = video.shape[0], video.shape[3]

    video = video[list(np.linspace(0,frames-1,32,dtype=int))]

    #mean_std = calculate_mean_std(video, channels_first=False, verbose=0)

    #video = preprocess_input(video, mean_std, divide_std=False, channels_first=False, verbose=0)

    cap.release()
      

    cv2.destroyAllWindows()
  
    return video

In [7]:
class My_Custom_Generator(tf.keras.utils.Sequence) :
  
  def __init__(self, video_filenames, labels, batch_size) :
    self.video_filenames = video_filenames
    self.labels = labels
    self.batch_size = batch_size
    
    
  def __len__(self) :
    return (np.ceil(len(self.video_filenames) / float(self.batch_size))).astype(np.int)
  
  
  def __getitem__(self, idx) :
    batch_x = self.video_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
    return np.array([
            gen_video_prep(file_name, (224, 224)) for file_name in batch_x]), np.array(batch_y)

In [8]:
y = np.asarray(training_data['encoded_labels'].values)
y = to_categorical(y)

In [9]:
gen = My_Custom_Generator(training_data['filename'].tolist(), y, 32)

In [10]:
img_feature_layer = hub.KerasLayer('https://tfhub.dev/deepmind/i3d-kinetics-600/1', input_shape = (32,224,224,3), trainable = False) #, input_shape=(40,224,224,3)

In [11]:
model = tf.keras.Sequential([img_feature_layer, 
                             tf.keras.layers.Dense(512, activation="relu"), 
                             tf.keras.layers.Dropout(0.3), 
                             tf.keras.layers.Dense(298, activation="softmax")])

In [12]:
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics=['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 600)               12909544  
_________________________________________________________________
dense (Dense)                (None, 512)               307712    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 298)               152874    
Total params: 13,370,130
Trainable params: 460,586
Non-trainable params: 12,909,544
_________________________________________________________________


In [None]:
model.fit(
        gen,
        epochs=200,
        batch_size=32
        )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200