In [1]:
# Collect videos from a directory
import os

video_directory = 'C:/Users/nihal/Desktop/Academics/Main/MainProject/Main_Project/Data Video'
videos = []
for filename in os.listdir(video_directory):
    if filename.endswith('.mp4'):
        videos.append(os.path.join(video_directory, filename))


In [2]:
# Resize videos to a standard size
import cv2

width, height = 640, 360

for video in videos:
    cap = cv2.VideoCapture(video)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (width, height))
        frames.append(frame)
    cap.release()

    # Save resized frames to disk
    video_name = os.path.splitext(os.path.basename(video))[0]
    for i, frame in enumerate(frames):
        filename = f'{video_name}_frame{i:03}.jpg'
        cv2.imwrite(os.path.join('C:/Users/nihal/Desktop/Academics/Main/MainProject/Main_Project/Data Video/frame', filename), frame)


In [15]:
# Extract edge features from video frames
import numpy as np

def extract_edges(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 100, 200)
    return np.dstack((edges, edges, edges))

features = []

num_frames = 16  # Set the number of frames per video

for video in videos:
    video_name = os.path.splitext(os.path.basename(video))[0]
    cap = cv2.VideoCapture(video)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames_per_segment = frame_count // num_frames

    for i in range(num_frames):
        frame_number = i * frames_per_segment
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (width, height))
            edges = extract_edges(frame)
            features.append(edges)
    cap.release()

In [41]:
import os
import cv2
import numpy as np
from keras.models import Model
from keras.layers import Input, Dense, Dropout, LSTM, TimeDistributed, Flatten, Permute, Reshape, Multiply, Lambda
from keras import backend as K

batch_size = 32
num_frames = 10
height = 64
width = 64
channels = 3

input_tensor = Input(shape=(num_frames, height, width, channels))

# Reshape the input tensor
reshaped_input = Reshape((num_frames, -1))(input_tensor)

# Define the LSTM layer
lstm_layer = LSTM(128, return_sequences=True)(reshaped_input)

# Define the output layer
output_layer = Dense(num_classes, activation='softmax')(lstm_layer)

# Define the model
model = Model(inputs=input_tensor, outputs=output_layer)

# Define the convolutional layers
conv1 = TimeDistributed(Conv2D(32, kernel_size=(3, 3), activation='relu'))(inputs)
conv2 = TimeDistributed(Conv2D(64, kernel_size=(3, 3), activation='relu'))(conv1)
conv3 = TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'))(conv2)
conv4 = TimeDistributed(Conv2D(256, kernel_size=(3, 3), activation='relu'))(conv3)

# Assuming your current input tensor shape is (batch_size, num_frames, height, width, channels)
reshaped_input = Reshape((num_frames, -1))(input_tensor)

# Define the LSTM layer
lstm1 = LSTM(256, return_sequences=True)(conv4)
lstm2 = LSTM(256)(lstm1)

# Define the attention mechanism
permute = Permute((2, 1))(lstm1)
reshape = Reshape((-1, 256))(permute)
dense1 = Dense(attention_size, activation='tanh')(reshape)
dropout1 = Dropout(0.5)(dense1)
dense2 = Dense(1, activation='linear')(dropout1)
flatten = Flatten()(dense2)
softmax = Lambda(lambda x: K.softmax(x, axis=1))(flatten)
reshape2 = Reshape((-1, 1))(softmax)
attention = Multiply()([lstm1, reshape2])
attention_sum = Lambda(lambda x: K.sum(x, axis=1))(attention)

# Define the fully connected layers
flatten2 = Flatten()(attention_sum)
dense3 = Dense(128, activation='relu')(flatten2)
dropout2 = Dropout(0.5)(dense3)
dense4 = Dense(64, activation='relu')(dropout2)
dropout3 = Dropout(0.5)(dense4)

# Define the output layer
output = Dense(num_classes, activation='softmax')(dropout3)

# Define the model
model = Model(inputs=inputs, outputs=output)

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Load the video data
data_dir = 'C:/Users/nihal/Desktop/project'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
test_dir = os.path.join(data_dir, 'test')

train_videos = [os.path.join(train_dir, f) for f in os.listdir(train_dir)]
val_videos = [os.path.join(val_dir, f) for f in os.listdir(val_dir)]
test_videos = [os.path.join(test_dir, f) for f in os.listdir(test_dir)]

# Define the function to extract frames from a video
def extract_frames(video):
    cap = cv2.VideoCapture(video)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frames_per_segment = frame_count // num_frames
    frames = []

    for i in range(num_frames):
        frame_number = i * frames_per_segment
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()
        frame = cv2.resize(frame, (width, height))
        frames.append(frame)

    cap.release()
    frames = np.array(frames)
    return frames

# Define the function to load the data
def load_data(videos):
    X = []
    y = []

    for video in videos:
        frames = extract_frames(video)
        label = os.path.basename(os.path.dirname(video))
        X.append(frames)
        y.append(label)

    X = np.array(X)
    y = np.array(y)
    return X, y

# Load the training, validation, and testing data
X_train, y_train = load_data(train_videos)
X_val, y_val = load_data(val_videos)
X_test, y_test = load_data(test_videos)

# Convert the labels to one-hot encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)
y_test_encoded = label_encoder.transform(y_test)

one_hot_encoder = OneHotEncoder(sparse=False)
y_train_one_hot = one_hot_encoder.fit_transform(y_train_encoded.reshape(-1, 1))
y_val_one_hot = one_hot_encoder.transform(y_val_encoded.reshape(-1, 1))
y_test_one_hot = one_hot_encoder.transform(y_test_encoded.reshape(-1, 1))

# Train the model
model.fit(X_train, y_train_one_hot, batch_size=32, epochs=10, validation_data=(X_val, y_val_one_hot))

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test_one_hot)
print('Test loss:', loss)
print('Test accuracy:', accuracy)


ValueError: Input 0 of layer "lstm_4" is incompatible with the layer: expected ndim=3, found ndim=5. Full shape received: (None, 16, 104, 104, 256)