### Extracting frame for the videos

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
import cv2
import os

In [None]:
def extract_frames(video_path, output_dir, frame_rate=1):
    cap = cv2.VideoCapture(video_path)
    os.makedirs(output_dir, exist_ok=True)
    count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_rate == 0:
            cv2.imwrite(f"{output_dir}/frame_{count}.jpg", frame)
        count += 1
    cap.release()

# Example usage
extract_frames('video.mp4', 'output_frames')

### Model Development

In [8]:
import tensorflow as tf

In [6]:
!pip install tensorflow



In [7]:
!pip install keras



In [9]:
from tensorflow.keras import layers, models

In [14]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, LSTM, Dense, Flatten, Conv2D, MaxPooling2D


In [10]:
def create_cnn_model(input_shape=(224, 224, 3)):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512, activation='relu'))

    ## binary classification
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

## creating CNN model
cnn_model = create_cnn_model()

## model summary
cnn_model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


### LSTM

In [13]:


## adjusing shapes on the basis of cnn output 
def create_lstm_model(input_shape=(None, 512)):  
    model = models.Sequential()
    model.add(layers.LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(layers.LSTM(64))

    ## binary cassification
    model.add(layers.Dense(1, activation='sigmoid')) 

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

## craetind a lstm model
lstm_model = create_lstm_model()

## summary
lstm_model.summary()


  super().__init__(**kwargs)


### Working together with lstm and cnn model

In [15]:
def create_cnn_lstm_model(input_shape=(10, 224, 224, 3)):

    ## cnn
    cnn_input = Input(shape=input_shape[1:])
    x = Conv2D(32, (3, 3), activation='relu')(cnn_input)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    cnn_output = Flatten()(x)

    ## lstm
    lstm_input = Input(shape=input_shape)
    x = TimeDistributed(Model(inputs=cnn_input, outputs=cnn_output))(lstm_input)
    x = LSTM(64)(x)
    lstm_output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=lstm_input, outputs=lstm_output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

## combined model
cnn_lstm_model = create_cnn_lstm_model()


cnn_lstm_model.summary()

In [18]:
from tensorflow.keras.layers import Layer, Input
from tensorflow.keras import initializers, activations
import tensorflow.keras.backend as K

### Capsule Networks

In [21]:
class CapsuleLayer(Layer):
    def __init__(self, num_capsules, dim_capsule, routings=3, **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsules = num_capsules
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_initializer = initializers.get('glorot_uniform')

    def build(self, input_shape):
        self.input_num_capsules = input_shape[1]
        self.input_dim_capsule = input_shape[2]
        self.W = self.add_weight(shape=[self.input_num_capsules, self.dim_capsule * self.num_capsules],
                                 initializer=self.kernel_initializer, name='W')
        self.built = True

    def call(self, inputs, training=None):
        # Reshape input and weights to match dimensions
        inputs_expand = K.expand_dims(inputs, 2)  # [None, num_capsules, 1, dim_capsule]
        inputs_tiled = K.tile(inputs_expand, [1, 1, self.num_capsules, 1])  # [None, num_capsules, num_capsules, dim_capsule]
        inputs_tiled = K.reshape(inputs_tiled, (-1, self.input_num_capsules, self.dim_capsule * self.num_capsules))

        u_hat = K.batch_dot(inputs_tiled, self.W)  # [None, num_capsules, dim_capsule * num_capsules]
        u_hat = K.reshape(u_hat, (-1, self.input_num_capsules, self.num_capsules, self.dim_capsule))

        b = K.zeros_like(u_hat[:, :, :, 0])  # [None, num_capsules, num_capsules]

        for i in range(self.routings):
            c = activations.softmax(b, axis=2)  # Routing softmax over num_capsules
            outputs = self.squash(K.batch_dot(c, u_hat, [2, 2]))  # [None, num_capsules, dim_capsule]
            if i < self.routings - 1:
                b = b + K.batch_dot(outputs, u_hat, [2, 3])

        return outputs

    def squash(self, x, axis=-1):
        s_squared_norm = K.sum(K.square(x), axis=axis, keepdims=True)
        scale = K.sqrt(s_squared_norm + K.epsilon())
        return x / scale

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.num_capsules, self.dim_capsule)


input_layer = Input(shape=(10, 512))  
capsule_layer = CapsuleLayer(num_capsules=10, dim_capsule=16, routings=3)(input_layer)

### GANs

In [22]:
from tensorflow.keras.layers import Dense, Reshape, LeakyReLU
from tensorflow.keras.models import Sequential

In [23]:
## model generator

In [25]:
def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=100))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(28 * 28 * 1, activation='tanh'))
    model.add(Reshape((28, 28, 1)))
    return model

## discriminator Model
def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_shape=(28, 28, 1)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    return model

## GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = Sequential([generator, discriminator])
    return model

## initiate model
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

### Audio-video matcher

In [29]:

def extract_audio_features(audio_path):
    y, sr = librosa.load(audio_path)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfccs, axis=1)


audio_features = extract_audio_features('audio.wav')


  y, sr = librosa.load(audio_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'audio.wav'

In [27]:
!pip install librosa

Collecting librosa
  Downloading librosa-0.10.2.post1-py3-none-any.whl.metadata (8.6 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.12.1-py2.py3-none-win_amd64.whl.metadata (14 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.4.0-cp312-cp312-win_amd64.whl.metadata (5.7 kB)
Downloading librosa-0.10.2.post1-py3-none-any.whl (260 kB)
   ---------------------------------------- 0.0/260.1 kB ? eta -:--:--
   ---- ----------------------------------- 30.7/260.1 kB 1.4 MB/s eta 0:00:01
   --------- ----------------------------- 61.4/260.1 kB 656.4 kB/s eta 0:00:01
   ---------------- --------------------- 112.6/260.1 kB 939.4 kB/s eta 0:00:01
   -------------------- ----------------- 143.4/260.1 kB 950.9 kB/s eta 0:00:01
   ----------------------------

In [28]:
import librosa

### Model- Training

In [30]:
from tensorflow.keras.callbacks import ModelCheckpoint

## final model cnn-lstm
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True)
cnn_lstm_model.fit(train_data, train_labels, validation_split=0.2, epochs=10, callbacks=[checkpoint])


ValueError: The filepath provided must end in `.keras` (Keras model format). Received: filepath=best_model.h5

In [31]:
## check
cnn_lstm_model.load_weights('best_model.h5')


test_loss, test_acc = cnn_lstm_model.evaluate(test_data, test_labels)
print(f'Test Accuracy: {test_acc}')

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'best_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)