## to apply DCT(Discrete Cosine Transformation) on each frame of a video

In [21]:
import cv2
from scipy.fftpack import dct, idct
import numpy as np

In [22]:
video = cv2.VideoCapture('datasets/tiktoks/tom_and_jerry_tiktok.mp4')
success, image = video.read()
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [23]:
def get_dct_feature_vector(frame):
    """
    Computes the DCT feature vector of a video frame.
    
    Parameters:
    frame: ndarray of shape (height, width, n_channels)
        Video frame.
    
    Returns:
    ndarray of shape (n_features,)
        DCT feature vector.
    """
    # Convert the frame to grayscale
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Compute the 2D DCT of the grayscale image
    dct = cv2.dct(frame.astype(np.float32))
    
    # Flatten the DCT coefficients into a 1D array
    dct_flat = dct.flatten()
    
    return dct_flat


In [24]:
def get_mean_feature_vector(frames):
    """
    Computes the mean feature vector of an ensemble of video frames.
    
    Parameters:
    frames: list of ndarrays of shape (height, width, n_channels)
        List of video frames.
    
    Returns:
    ndarray of shape (n_features,)
        Mean feature vector.
    """
    # Compute the DCT feature vectors for all frames
    features = [get_dct_feature_vector(frame) for frame in frames]
    
    # Compute the mean feature vector
    mean = np.mean(features, axis=0)
    
    return mean


In [25]:
def get_diagonal_covariance_matrix(frames):
    """
    Computes the diagonal covariance matrix of an ensemble of video frames.
    
    Parameters:
    frames: list of ndarrays of shape (height, width, n_channels)
        List of video frames.
    
    Returns:
    ndarray of shape (n_features,)
        Diagonal covariance matrix.
    """
    # Compute the DCT feature vectors for all frames
    features = [get_dct_feature_vector(frame) for frame in frames]
    
    # Compute the variance of each element in the DCT feature vectors
    variances = np.var(features, axis=0)
    
    # Construct a diagonal matrix from the variances
    diagonal_covariance_matrix = np.diag(variances)
    
    return diagonal_covariance_matrix


In [26]:
get_diagonal_covariance_matrix(gray_image)

array([[4.1440460e+06, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 2.6200400e+05, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 6.6077977e+04, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       ...,
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 2.1295710e+00,
        0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        1.7613146e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 1.3129641e+00]], dtype=float32)

In [1]:
import numpy as np
from scipy.stats import norm

def gaussian_query_model(X, fi, Z):
    """
    Computes the similarity score between an arbitrary video frame and a query video segment using the Gaussian Query Model.
    
    Parameters:
    X: ndarray of shape (n_features,)
        DCT feature vector of the video frame.
    fi: ndarray of shape (n_features,)
        Mean feature vector of the Gaussian distribution.
    Z: ndarray of shape (n_features,)
        Diagonal covariance matrix of the Gaussian distribution.
    
    Returns:
    float
        Similarity score between zero and one.
    """
    z = np.square(np.linalg.norm(X - fi)) / np.square(np.linalg.norm(Z))
    score = 1 - norm.cdf(z)
    return score


In [3]:
import cv2
import numpy as np

# Open the video file
video = cv2.VideoCapture('datasets/tiktoks/tom_and_jerry_tiktok.mp4')

# List to store feature vectors (X) for each frame
feature_vectors = []

# Loop through frames and extract DCT features
while True:
    success, image = video.read()
    if not success:
        break
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Perform DCT on the grayscale frame to get feature vector X
    dct_features = cv2.dct(np.float32(gray_image))

    # Append the DCT feature vector to the list
    feature_vectors.append(dct_features)

# Stack the feature vectors into a numpy array
feature_vectors = np.array(feature_vectors)
# feature_vectors = feature_vectors.reshape(len(feature_vectors), -1)
# # Compute the mean (fi) and covariance matrix (Z)
# fi = np.mean(feature_vectors, axis=0)
# Z = np.cov(feature_vectors, rowvar=False)

# Now you have X, fi, and Z for the video frames


In [4]:
len(feature_vectors)

2207

In [12]:
num_coefficients_to_keep = 30

# Initialize a list to store reduced DCT feature vectors
reduced_feature_vectors = []

# Loop through the original DCT feature vectors
for dct_features in feature_vectors:
    # Flatten the 2D DCT feature vector to a 1D array
    flattened_features = dct_features.flatten()

    # Keep the first 'num_coefficients_to_keep' coefficients
    reduced_features = flattened_features[:num_coefficients_to_keep]

    # Append the reduced feature vector to the list
    reduced_feature_vectors.append(reduced_features)

array([ 5.6568828e+04, -2.2924290e+03, -1.2859888e+02,  7.7700446e+02,
        3.2490677e+02,  1.0842490e+03,  1.0840892e+03,  1.1373260e+03,
        5.3247943e+02,  2.5150027e+02,  1.0463481e+02,  1.8450529e+02,
       -1.5336316e+02,  3.2883893e+02,  5.8651660e+02,  3.5167456e+02,
        7.7575310e+01,  2.5430179e+02, -5.8046130e+02,  2.6745441e+02,
        4.4231434e+01, -2.0824574e-01, -2.5066266e+02, -2.1156836e+02,
       -1.7688683e+02, -1.0417820e+02,  3.2860790e+01, -3.3824768e+02,
       -3.0170195e+01,  4.2006760e+02], dtype=float32)