In [3]:
# Cell 1: Imports
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed,
                                     LSTM, GRU, Conv3D, MaxPooling3D, BatchNormalization)
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [4]:
# Cell 2: Data settings and processing

IMAGE_HEIGHT, IMAGE_WIDTH = 128, 128
SEQUENCE_LENGTH = 20

DATASET_DIR = "/kaggle/input/shoplifting-videos-dataset/Shop DataSet"

CLASSES_LIST = ["shop lifters", "non shop lifters"]

In [5]:
# Cell 3: Frame extraction function
def frames_extraction(video_path):
    frames_list = []
    
    video_reader = cv2.VideoCapture(video_path)
    
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    
    skip_frames_window = max(int(video_frames_count / SEQUENCE_LENGTH), 1)
    
    for frame_counter in range(SEQUENCE_LENGTH):
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        
        success, frame = video_reader.read()
        
        if not success:
            break
            
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        normalized_frame = resized_frame.astype(np.float32) / 255.0
        
        frames_list.append(normalized_frame)
        
    video_reader.release()
    return frames_list

In [6]:
'''
# Cell 4: Dataset creation function
def create_dataset():
    features = []
    labels = []
    
    for class_index, class_name in enumerate(CLASSES_LIST):
        print(f'Processing: {class_name}')
        
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        for file_name in files_list:
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)
            
            frames = frames_extraction(video_file_path)
            
            if len(frames) == SEQUENCE_LENGTH:
                features.append(frames)
                labels.append(class_index)
                
    features = np.asarray(features, dtype=np.float32)
    labels = np.array(labels, dtype=np.float32)
    
    return features, labels
'''
def create_dataset():
    features = []
    labels = []
    
    for class_index, class_name in enumerate(CLASSES_LIST):
        print(f'Processing: {class_name}')
        files_list = os.listdir(os.path.join(DATASET_DIR, class_name))
        
        for file_name in files_list:
            
            if class_name == "non shop lifters" and file_name.endswith('_1.mp4'):
                print(f"  - Filtering out duplicate file: {file_name}")
                continue
            
            video_file_path = os.path.join(DATASET_DIR, class_name, file_name)
            frames = frames_extraction(video_file_path)
            
            if len(frames) == SEQUENCE_LENGTH:
                features.append(frames)
                labels.append(class_index)
                
    features = np.asarray(features, dtype=np.float32)
    labels = np.array(labels, dtype=np.float32)
    
    return features, labels

print("done")

done


In [None]:
# Cell 5: Create and split dataset
features, labels = create_dataset()

one_hot_encoded_labels = to_categorical(labels)

features_train, features_test, labels_train, labels_test = train_test_split(
    features, one_hot_encoded_labels, test_size=0.2, shuffle=True, random_state=42)

print("-----------------------------------------")
print(f"Train Data Shape {features_train.shape}")
print(f"Test Data Shape {features_test.shape}")
print("-----------------------------------------")

Processing: shop lifters
Processing: non shop lifters
  - Filtering out duplicate file: shop_lifter_n_86_1.mp4
  - Filtering out duplicate file: shop_lifter_n_19_1.mp4
  - Filtering out duplicate file: shop_lifter_n_185_1.mp4
  - Filtering out duplicate file: shop_lifter_n_40_1.mp4
  - Filtering out duplicate file: shop_lifter_n_218_1.mp4
  - Filtering out duplicate file: shop_lifter_n_140_1.mp4
  - Filtering out duplicate file: shop_lifter_n_32_1.mp4
  - Filtering out duplicate file: shop_lifter_n_70_1.mp4
  - Filtering out duplicate file: shop_lifter_n_98_1.mp4
  - Filtering out duplicate file: shop_lifter_n_96_1.mp4
  - Filtering out duplicate file: shop_lifter_n_16_1.mp4
  - Filtering out duplicate file: shop_lifter_n_24_1.mp4
  - Filtering out duplicate file: shop_lifter_n_213_1.mp4
  - Filtering out duplicate file: shop_lifter_n_214_1.mp4
  - Filtering out duplicate file: shop_lifter_n_7_1.mp4
  - Filtering out duplicate file: shop_lifter_n_80_1.mp4
  - Filtering out duplicate fi

In [None]:
# Cell 6: Model creation functions
def create_conv_lstm_model():

    base_model = VGG16(weights='imagenet', include_top=False, 
                       input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))
    
    base_model.trainable = False

    model = Sequential()
    model.add(TimeDistributed(base_model, input_shape=(SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    model.add(TimeDistributed(Flatten()))
    
    model.add(LSTM(64))
    
    model.add(Dense(len(CLASSES_LIST), activation='softmax'))
    
    model.summary()
    return model


def create_3d_cnn_model():

    model = Sequential()
    
    model.add(Conv3D(filters=16, kernel_size=(3, 3, 3), activation='relu', padding='same',
                     input_shape=(SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(BatchNormalization())
    
    model.add(Conv3D(filters=32, kernel_size=(3, 3, 3), activation='relu', padding='same'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(BatchNormalization())

    model.add(Conv3D(filters=64, kernel_size=(3, 3, 3), activation='relu', padding='same'))
    model.add(MaxPooling3D(pool_size=(2, 2, 2)))
    model.add(BatchNormalization())
    
    model.add(Flatten())
    
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(len(CLASSES_LIST), activation='softmax'))
    
    model.summary()
    return model


#def create_transformer_model():

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
# Cell 7: Model training and evaluation

model = create_conv_lstm_model()
#model = create_3d_cnn_model()
# model = create_transformer_model()

model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])


checkpoint_path = "best_shoppplifting_model.h5"
checkpoint = ModelCheckpoint(filepath=checkpoint_path,
                             monitor='val_accuracy',
                             verbose=1,
                             save_best_only=True,
                             mode='max')


history = model.fit(features_train, labels_train, 
                    epochs=30, 
                    batch_size=4, 
                    shuffle=True, 
                    validation_split=0.2,
                    callbacks=[checkpoint])


model_evaluation_history = model.evaluate(features_test, labels_test)
model_evaluation_loss, model_evaluation_accuracy = model_evaluation_history

print(f"Accuracy: {model_evaluation_accuracy * 100:.2f}%")
print(f"Loss: {model_evaluation_loss:.4f}")

In [None]:
# Cell 8: Plotting functions
def plot_metric(history, metric_name, title):
    plt.plot(history.history[metric_name], label='Training ' + metric_name)
    plt.plot(history.history['val_' + metric_name], label='Validation ' + metric_name)
    plt.title(title)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    plt.legend()
    plt.show()

plot_metric(history, 'accuracy', 'Model Accuracy')

plot_metric(history, 'loss', 'Model Loss')

In [None]:
# Cell 8: Plotting functions
def plot_metric(history, metric_name, title):
    plt.plot(history.history[metric_name], label='Training ' + metric_name)
    plt.plot(history.history['val_' + metric_name], label='Validation ' + metric_name)
    plt.title(title)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    plt.legend()
    plt.show()

plot_metric(history, 'accuracy', 'Model Accuracy')

plot_metric(history, 'loss', 'Model Loss')