In [None]:
!ls

In [None]:
!mkdir -p /content/dataset/train/hello
!mkdir -p /content/dataset/train/wrong
!mkdir -p /content/dataset/test/hello
!mkdir -p /content/dataset/test/wrong

!ls hello_*.MOV | shuf -n 16 | xargs -I {} mv {} /content/dataset/test/hello/
!mv hello_*.MOV /content/dataset/train/hello/

!ls wrong_*.MOV | shuf -n 6 | xargs -I {} mv {} /content/dataset/test/wrong/
!mv wrong_*.MOV /content/dataset/train/wrong/

In [None]:
import numpy as np
import tensorflow as tf
import cv2
import os
from tensorflow.keras.utils import Sequence
import random

class VideoFrameGenerator(Sequence):
    def __init__(self, video_folder, batch_size=4, sequence_length=10, target_size=(224, 224), shuffle=True):
        self.video_folder = video_folder
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.target_size = target_size
        self.shuffle = shuffle
        self.classes = sorted(os.listdir(video_folder))
        self.video_paths = []

        # Collect all video file paths and labels
        for class_idx, class_name in enumerate(self.classes):
            class_folder = os.path.join(video_folder, class_name)
            videos = [os.path.join(class_folder, vid) for vid in os.listdir(class_folder)]
            self.video_paths.extend([(vid, class_idx) for vid in videos])

        if self.shuffle:
            np.random.shuffle(self.video_paths)

    def __len__(self):
        return int(np.floor(len(self.video_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_videos = self.video_paths[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self._load_batch(batch_videos)
        return X, y

    def _load_batch(self, batch_videos):
      X_batch = []
      y_batch = []

      for video_path, label in batch_videos:
        frames = self._extract_frames(video_path)

        if frames.shape != (self.sequence_length, *self.target_size, 3):
            print(f"Skipping {video_path}: Unexpected shape {frames.shape}")
            continue

        X_batch.append(frames)
        y_batch.append(label)

      return np.array(X_batch), np.array(y_batch)


    def _extract_frames(self, video_path):
      cap = cv2.VideoCapture(video_path)
      frames = []
      total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
      frame_interval = max(1, total_frames // self.sequence_length)

      for i in range(self.sequence_length):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, self.target_size)
            frame = frame / 255.0  # Normalize

            # Apply Transformations (Data Augmentation)
            if random.random() < 0.5:
                frame = cv2.flip(frame, 1)  # Random Horizontal Flip
            if random.random() < 0.3:
                frame = frame * random.uniform(0.7, 1.3)  # Random Brightness Change
            if random.random() < 0.3:
                noise = np.random.normal(0, 0.1, frame.shape)  # Add Gaussian Noise
                frame = np.clip(frame + noise, 0, 1)

        else:
            frame = np.zeros((*self.target_size, 3))  # Fill missing frames with black images
        frames.append(frame)

      cap.release()
      return np.array(frames)

# Initialize generators
train_gen = VideoFrameGenerator("/content/dataset/train", batch_size=4, sequence_length=10)
val_gen = VideoFrameGenerator("/content/dataset/test", batch_size=4, sequence_length=10)


In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import TimeDistributed, LSTM, Dense, Flatten, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout

# Load pre-trained CNN
base_model = MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Define LSTM Model
model = Sequential([
    TimeDistributed(base_model, input_shape=(10, 224, 224, 3)),
    TimeDistributed(Flatten()),
    Dropout(0.4),
    LSTM(128, return_sequences=False),
    Dropout(0.4),
    Dense(2, activation="softmax")
])


model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(train_gen, validation_data=val_gen, epochs=10)

Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 19s/step - accuracy: 0.6215 - loss: 0.9647 - val_accuracy: 0.6500 - val_loss: 0.6331
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 11s/step - accuracy: 0.7688 - loss: 0.5125 - val_accuracy: 0.4500 - val_loss: 0.8687
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 11s/step - accuracy: 0.6653 - loss: 0.7175 - val_accuracy: 0.3000 - val_loss: 0.7782
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 11s/step - accuracy: 0.5889 - loss: 0.7501 - val_accuracy: 0.5500 - val_loss: 0.7220
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 11s/step - accuracy: 0.4403 - loss: 0.8750 - val_accuracy: 0.5500 - val_loss: 0.6808
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 11s/step - accuracy: 0.6299 - loss: 0.5433 - val_accuracy: 0.6000 - val_loss: 0.6751
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7bf5e44204f0>

In [None]:
import numpy as np
import cv2
import os

def predict_video(video_path, model):
    cap = cv2.VideoCapture(video_path)
    frames = []

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // 10)  # Ensure we get 10 frames

    for i in range(10):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
        ret, img = cap.read()
        if ret:
            img = cv2.resize(img, (224, 224))
            img = img / 255.0
            frames.append(img)
        else:
            frames.append(np.zeros((224, 224, 3)))

    cap.release()

    frames = np.array(frames)
    frames = np.expand_dims(frames, axis=0)  # Add batch dimension
    prediction = model.predict(frames)
    class_index = np.argmax(prediction)

    return "hello" if class_index == 0 else "wrong"


# Folder paths
test_folder = "/content/dataset/test"
classes = ["hello", "wrong"]
correct_predictions = 0
total_videos = 0

# Loop through test videos
for label in classes:
    class_folder = os.path.join(test_folder, label)
    for video in os.listdir(class_folder):
        video_path = os.path.join(class_folder, video)
        predicted_label = predict_video(video_path, model)

        print(f"Video: {video} | True Label: {label} | Predicted: {predicted_label}")

        if predicted_label == label:
            correct_predictions += 1
        total_videos += 1

# Calculate accuracy
accuracy = (correct_predictions / total_videos) * 100
print(f"\nModel Accuracy on Test Set: {accuracy:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Video: hello_0019.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Video: hello_0017.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Video: hello_0021.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Video: hello_0010.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
Video: hello_0016.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
Video: .ipynb_checkpoints | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Video: hello_0011.MOV | True Label: hello | Predicted: wrong
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65m