## Adding Data Augmentation 

In [None]:
import cv2
import os

def check_video(video_path):
    if not os.path.exists(video_path):
        print(f"Video file does not exist: {video_path}")
        return False

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Could not open video file: {video_path}")
        return False

    return True
def prepare_all_videos(main_output_directory, model_name, max_seq_length):
    video_names = os.listdir(main_output_directory)

    frame_features_list = []
    frame_masks_list = []
    all_labels = []
    
    if model_name == 'inception':
        img_size = (299, 299)
    elif model_name == 'resnet':
        img_size = (224, 224)
    elif model_name == 'vgg':
        img_size = (224, 224)
    
    for video_name in video_names:
        video_path = os.path.join(main_output_directory, video_name)
        # Check if the video can be opened
        if not check_video(video_path):
            continue
        
        frames = load_video(video_path, resize=img_size)
        frames = select_keyframes(frames)
        frames = extract_features_from_frames(frames, model_name=model_name)
        
        sequence_length = frames.shape[0]
        if sequence_length > max_seq_length:
            frames = frames[:max_seq_length]
            sequence_length = max_seq_length

        frame_mask = np.zeros(shape=(max_seq_length, 1), dtype="bool")
        if sequence_length < max_seq_length:
            frame_mask[sequence_length:, :] = 1

        frame_feature = np.zeros(shape=(max_seq_length, NUM_FEATURES), dtype="float32")
        frame_feature[:sequence_length, :] = frames

        frame_features_list.append(frame_feature)
        frame_masks_list.append(frame_mask)
        all_labels.append(1)  # as all labels are 1 for augmented videos

    frame_features = np.array(frame_features_list)
    frame_masks = np.array(frame_masks_list)
    all_labels = np.array(all_labels)
    
    return (frame_features, frame_masks), all_labels

augmented_features, augmented_labels = prepare_all_videos(augmented_dir, model_name='inception', max_seq_length=max_seq_length)
np.savez('augmented_features_25_inception.npz', features=augmented_features[0], masks=augmented_features[1])
np.save('augmented_labels_25_inception.npy', augmented_labels)

In [None]:
from sklearn.utils import shuffle

# Load the train_data
train_features = np.load('train_features_25_inception.npz')['features']
train_masks = np.load('train_features_25_inception.npz')['masks']
train_labels = np.load('train_labels_25_inception.npy')

# Load the augmented_data
augmented_features = np.load('augmented_features_25_inception.npz')['features']
augmented_masks = np.load('augmented_features_25_inception.npz')['masks']
augmented_labels = np.load('augmented_labels_25_inception.npy')

# Concatenate features, masks and labels
all_train_features = np.concatenate([train_features, augmented_features])
all_train_masks = np.concatenate([train_masks, augmented_masks])
all_train_labels = np.concatenate([train_labels, augmented_labels])

# Create an array of indices and shuffle them
indices = np.arange(all_train_features.shape[0])
np.random.shuffle(indices)

# Use these shuffled indices to shuffle your data.
all_train_features = all_train_features[indices]
all_train_masks = all_train_masks[indices]
all_train_labels = all_train_labels[indices]


'''Train features shape: (380, 5, 2048)
Train masks shape: (380, 5, 1)
Train labels shape: (380,)
Augmented features shape: (120, 5, 2048)
Augmented masks shape: (120, 5, 1)
Augmented labels shape: (120,)
All train features shape: (500, 5, 2048)
All train masks shape: (500, 5, 1)
All train labels shape: (500,)'''