In [29]:
import tensorflow as tf
from tensorflow.keras import layers, models
tf.config.run_functions_eagerly(True)
from tensorflow.keras.optimizers import Adam
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [31]:
import numpy as np
import cv2
from tensorflow.keras.utils import Sequence
import tensorflow as tf

class VideoDataGenerator(Sequence):
    def __init__(self, video_paths, labels, batch_size=4, dim=(100, 100), n_frames=64, n_channels=3, n_classes=85, shuffle=True, augment=False):
        self.video_paths = video_paths
        self.labels = labels
        self.batch_size = batch_size
        self.dim = dim
        self.n_frames = n_frames
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.video_paths))
        self.augment = augment
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.video_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        batch_video_paths = [self.video_paths[k] for k in batch_indexes]
        batch_labels = [self.labels[k] for k in batch_indexes]

        X, y = self.__data_generation(batch_video_paths, batch_labels)
        if self.augment:
            X = tf.map_fn(augment_video, X, dtype=tf.float32)
        if X.shape[0] == 0 or y.shape[0] == 0:
            return self.__getitem__((index + 1) % len(self))

        return X, y

    def __data_generation(self, batch_video_paths, batch_labels):
        X = np.empty((self.batch_size, self.n_frames, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        
        for i, video_path in enumerate(batch_video_paths):
            video = self.load_video(video_path)
            X[i,] = video
            y[i] = batch_labels[i]
        
        return X, tf.keras.utils.to_categorical(y, num_classes=self.n_classes)

    def load_video(self, path):
        cap = cv2.VideoCapture(path)
        frames = []
        while cap.isOpened() and len(frames) < self.n_frames:
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, self.dim)
            frame = frame.astype(np.float32) / 255.0  # Convert to float32 and normalize
            frames.append(frame)
        
        cap.release()
        
        if len(frames) < self.n_frames:
            frames.extend([np.zeros((*self.dim, self.n_channels), dtype=np.float32)] * (self.n_frames - len(frames)))
            
        return np.array(frames)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)


In [32]:
import os
import glob

def get_video_paths_and_labels(base_dir):
    video_paths = []
    labels = []
    label_map = {}
    current_label = 0
    
    
    if not os.path.exists(base_dir):
        print(f"Error: Directory {base_dir} does not exist!")
        return [], [], {}
    
    for label_name in sorted(os.listdir(base_dir)):
        label_dir = os.path.join(base_dir, label_name)
        
        if os.path.isdir(label_dir):
            label_map[label_name] = current_label
            video_count = 0
            
            for video_name in os.listdir(label_dir):
                video_path = os.path.join(label_dir, video_name)
                if video_path.endswith('.avi'):
                    video_paths.append(video_path)
                    labels.append(current_label)
                    video_count += 1
            
            current_label += 1
        else:
            print(f"Warning: {label_dir} is not a directory")
    
    return video_paths, labels, label_map

In [33]:
def load_datasets():
    train_dir = 'D:\\aadesh\\dataset\\ucf101\\train'
    validation_dir = 'D:\\aadesh\\dataset\\ucf101\\val'
    test_dir = 'D:\\aadesh\\dataset\\ucf101\\test'
    
    train_video_paths, train_labels, train_label_map = get_video_paths_and_labels(train_dir)
    val_video_paths, val_labels, val_label_map = get_video_paths_and_labels(validation_dir)
    test_video_paths, test_labels, test_label_map = get_video_paths_and_labels(test_dir)
    
    # Ensure label maps are consistent across all splits
    assert train_label_map == val_label_map == test_label_map
    
    return (train_video_paths, train_labels), (val_video_paths, val_labels), (test_video_paths, test_labels), train_label_map

(train_video_paths, train_labels), (val_video_paths, val_labels), (test_video_paths, test_labels), label_map = load_datasets()

print(f"Training classes: {label_map}")
print(f"Number of training videos: {len(train_video_paths)}")
print(f"Number of validation videos: {len(val_video_paths)}")
print(f"Number of test videos: {len(test_video_paths)}")

Training classes: {'ApplyEyeMakeup': 0, 'ApplyLipstick': 1, 'Archery': 2, 'BabyCrawling': 3, 'BalanceBeam': 4, 'BandMarching': 5, 'BaseballPitch': 6, 'Basketball': 7, 'BasketballDunk': 8, 'BenchPress': 9, 'Biking': 10, 'Billiards': 11, 'BlowDryHair': 12, 'BlowingCandles': 13, 'BodyWeightSquats': 14, 'Bowling': 15, 'BoxingPunchingBag': 16, 'BoxingSpeedBag': 17, 'BreastStroke': 18, 'BrushingTeeth': 19, 'CleanAndJerk': 20, 'CliffDiving': 21, 'CricketBowling': 22, 'CricketShot': 23, 'CuttingInKitchen': 24, 'Diving': 25, 'Drumming': 26, 'Fencing': 27, 'FieldHockeyPenalty': 28, 'FloorGymnastics': 29, 'FrisbeeCatch': 30, 'FrontCrawl': 31, 'GolfSwing': 32, 'Haircut': 33, 'HammerThrow': 34, 'Hammering': 35, 'HandstandPushups': 36, 'HandstandWalking': 37, 'HeadMassage': 38, 'HighJump': 39, 'HorseRace': 40, 'HorseRiding': 41, 'HulaHoop': 42, 'IceDancing': 43, 'JavelinThrow': 44, 'JugglingBalls': 45, 'JumpRope': 46, 'JumpingJack': 47, 'Kayaking': 48, 'Knitting': 49, 'LongJump': 50, 'Lunges': 51, '

In [34]:
n_classes = len(np.unique(train_labels))
batch_size=4
dim=(64, 64)
n_frames=64
n_channels=3
n_classes

101

In [35]:
train_data_gen = VideoDataGenerator(
    video_paths=train_video_paths,
    labels=train_labels,
    batch_size=batch_size,
    dim=dim,
    n_frames=n_frames,
    n_channels=n_channels,
    n_classes=n_classes,
    shuffle=True,
)

val_data_gen = VideoDataGenerator(
    video_paths=val_video_paths,
    labels=val_labels,
    batch_size=batch_size,
    dim=dim,
    n_frames=n_frames,
    n_channels=n_channels,
    n_classes=n_classes,
    shuffle=False
)

In [36]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_i3d_inception(input_shape=(64, 64, 64, 3), n_classes=n_classes):
    inputs = layers.Input(shape=input_shape)

    # Initial Conv3D and Pooling layers
    x = layers.Conv3D(64, (3, 7, 7), strides=(1, 2, 2), padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same')(x)

    # Define Conv3D-based Inception blocks
    def inception_block(input_tensor, filters):
        branch1x1 = layers.Conv3D(filters[0], (1, 1, 1), padding='same', activation='relu')(input_tensor)
        branch1x1 = layers.BatchNormalization()(branch1x1)

        branch3x3 = layers.Conv3D(filters[1], (1, 1, 1), padding='same', activation='relu')(input_tensor)
        branch3x3 = layers.Conv3D(filters[2], (3, 3, 3), padding='same', activation='relu')(branch3x3)
        branch3x3 = layers.BatchNormalization()(branch3x3)

        branch5x5 = layers.Conv3D(filters[3], (1, 1, 1), padding='same', activation='relu')(input_tensor)
        branch5x5 = layers.Conv3D(filters[4], (3, 3, 3), padding='same', activation='relu')(branch5x5)
        branch5x5 = layers.BatchNormalization()(branch5x5)

        branch_pool = layers.MaxPooling3D((1, 3, 3), strides=(1, 1, 1), padding='same')(input_tensor)
        branch_pool = layers.Conv3D(filters[5], (1, 1, 1), padding='same', activation='relu')(branch_pool)
        branch_pool = layers.BatchNormalization()(branch_pool)

        output = layers.Concatenate(axis=-1)([branch1x1, branch3x3, branch5x5, branch_pool])

        # Match shapes using a 1x1x1 convolutional layer if necessary
        if input_tensor.shape[-1] != output.shape[-1]:
            input_tensor = layers.Conv3D(output.shape[-1], (1, 1, 1), padding='same', activation=None)(input_tensor)
            input_tensor = layers.BatchNormalization()(input_tensor)

        output = layers.Add()([input_tensor, output])
        return output

    # Apply Inception blocks
    x = inception_block(x, [64, 96, 128, 16, 32, 32])
    x = inception_block(x, [128, 128, 192, 32, 96, 64])
    x = layers.MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same')(x)

    # Final layers
    x = layers.GlobalAveragePooling3D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.25)(x)
    outputs = layers.Dense(n_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model


In [37]:
i3d_model = build_i3d_inception(input_shape=(64, 64, 64, 3), n_classes=n_classes)

In [38]:
i3d_model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
i3d_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 64, 64, 64,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv3d_30 (Conv3D)             (None, 64, 32, 32,   28288       ['input_3[0][0]']                
                                64)                                                               
                                                                                                  
 batch_normalization_22 (BatchN  (None, 64, 32, 32,   256        ['conv3d_30[0][0]']              
 ormalization)                  64)                                                         

In [39]:
print(f"Number of unique labels: {len(np.unique(train_labels))}")
print(f"Number of classes in model: {n_classes}")

Number of unique labels: 101
Number of classes in model: 101


In [40]:
history = i3d_model.fit(
    train_data_gen,
    epochs=10,
    validation_data=val_data_gen,
    steps_per_epoch=len(train_data_gen),
    validation_steps=len(val_data_gen),
    verbose=1
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
print("Calculating final validation accuracy...")
val_loss, val_accuracy = i3d_model.evaluate(val_data_gen, steps=len(val_data_gen), verbose=1)
print(f"Final validation accuracy: {val_accuracy:.4f}")

Calculating final validation accuracy...
Final validation accuracy: 0.5024
