In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, LSTM, Conv2D, MaxPooling2D, Flatten, TimeDistributed, GlobalAveragePooling2D, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical, Sequence
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import MobileNetV2
import glob
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import gc

In [2]:
# Set memory growth for GPU
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

# Configuration parameters
CONFIG = {
    'seed': 42,
    'target_size': (224, 224),  # Resizing frames to save memory
    'batch_size': 4,  # Small batch size to avoid memory issues
    'epochs': 10,
    'frames_per_video': 16,  # Taking subset of frames to save memory
    'learning_rate': 1e-4,
    'num_classes': 2,  # Shoplifter vs Non-shoplifter
}

# Set random seeds for reproducibility
tf.random.set_seed(CONFIG['seed'])
np.random.seed(CONFIG['seed'])


In [3]:
# Create a DataFrame with video paths and labels
def create_dataframe(shoplifters_dir, non_shoplifters_dir):
    shoplifters_paths = glob.glob(os.path.join(shoplifters_dir, '*.mp4'))
    non_shoplifters_paths = glob.glob(os.path.join(non_shoplifters_dir, '*.mp4'))
    
    shoplifters_df = pd.DataFrame({
        'path': shoplifters_paths,
        'label': 1  # 1 for shoplifter
    })
    
    non_shoplifters_df = pd.DataFrame({
        'path': non_shoplifters_paths,
        'label': 0  # 0 for non-shoplifter
    })
    
    df = pd.concat([shoplifters_df, non_shoplifters_df], ignore_index=True)
    return df


In [4]:
# Exploratory Data Analysis
def perform_eda(df, shoplifters_dir, non_shoplifters_dir):
    print("Data Summary:")
    print(f"Total videos: {len(df)}")
    print(f"Shoplifter videos: {len(df[df['label'] == 1])}")
    print(f"Non-shoplifter videos: {len(df[df['label'] == 0])}")
    
    # Sample a few videos to analyze
    sample_videos = df.sample(min(10, len(df)), random_state=CONFIG['seed'])
    
    video_stats = []
    for _, row in sample_videos.iterrows():
        video_path = row['path']
        cap = cv2.VideoCapture(video_path)
        
        # Get video properties
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        duration = frame_count / fps
        
        video_stats.append({
            'path': video_path,
            'label': row['label'],
            'frame_count': frame_count,
            'fps': fps,
            'width': width,
            'height': height,
            'duration': duration
        })
        
        cap.release()
    
    stats_df = pd.DataFrame(video_stats)
    print("\nVideo Statistics:")
    print(stats_df.describe())
    
    # Plot distribution of frame counts
    plt.figure(figsize=(10, 6))
    sns.histplot(data=stats_df, x='frame_count', hue='label', 
                 element='step', common_norm=False, bins=20)
    plt.title('Distribution of Frame Counts')
    plt.xlabel('Number of Frames')
    plt.ylabel('Count')
    plt.legend(['Non-Shoplifter', 'Shoplifter'])
    plt.savefig('frame_count_distribution.png')
    plt.close()
    
    # Plot distribution of video durations
    plt.figure(figsize=(10, 6))
    sns.histplot(data=stats_df, x='duration', hue='label', 
                 element='step', common_norm=False, bins=20)
    plt.title('Distribution of Video Durations')
    plt.xlabel('Duration (seconds)')
    plt.ylabel('Count')
    plt.legend(['Non-Shoplifter', 'Shoplifter'])
    plt.savefig('duration_distribution.png')
    plt.close()
    
    return stats_df


In [5]:
# Video Data Generator
class VideoDataGenerator(Sequence):
    def __init__(self, dataframe, batch_size, frames_per_video, target_size, num_classes, shuffle=True):
        self.dataframe = dataframe
        self.batch_size = batch_size
        self.frames_per_video = frames_per_video
        self.target_size = target_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.indexes = np.arange(len(dataframe))
        if self.shuffle:
            np.random.shuffle(self.indexes)
            
        # Image data augmentation
        self.img_gen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=10,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.1,
            zoom_range=0.1,
            horizontal_flip=True,
            fill_mode='nearest'
        )
        
        # Handle empty batches issue by preprocessing to identify valid videos
        self.valid_indices = []
        for i in range(len(self.dataframe)):
            video_path = self.dataframe.iloc[i]['path']
            if self._check_video_valid(video_path):
                self.valid_indices.append(i)
                
        self.indexes = np.array(self.valid_indices)
        if self.shuffle:
            np.random.shuffle(self.indexes)
        
    def _check_video_valid(self, video_path):
        """Check if video has enough frames to extract."""
        try:
            cap = cv2.VideoCapture(video_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()
            return frame_count >= self.frames_per_video
        except:
            return False
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.ceil(len(self.indexes) / self.batch_size))
    
    def __getitem__(self, idx):
        """Generate one batch of data"""
        # Generate indexes of the batch
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        # Find list of IDs
        batch_df = self.dataframe.iloc[batch_indexes]
        
        batch_videos = []
        batch_labels = []
        
        for _, row in batch_df.iterrows():
            video_path = row['path']
            label = row['label']
            
            # Process video
            frames = self._extract_frames(video_path)
            if frames is not None and len(frames) == self.frames_per_video:
                batch_videos.append(frames)
                batch_labels.append(label)
        
        # Ensure we have at least one valid video in the batch
        if len(batch_videos) == 0:
            # If no valid videos in batch, use first valid video
            first_valid_path = self.dataframe.iloc[self.indexes[0]]['path']
            first_valid_label = self.dataframe.iloc[self.indexes[0]]['label']
            frames = self._extract_frames(first_valid_path)
            if frames is not None:
                batch_videos.append(frames)
                batch_labels.append(first_valid_label)
        
        # Convert to numpy arrays
        batch_videos = np.array(batch_videos)
        batch_labels = to_categorical(np.array(batch_labels), num_classes=self.num_classes)
        
        return batch_videos, batch_labels
    
    def _extract_frames(self, video_path):
        try:
            cap = cv2.VideoCapture(video_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            
            if frame_count <= 0:
                cap.release()
                return None
            
            # Calculate indices of frames to extract
            indices = np.linspace(0, frame_count - 1, self.frames_per_video, dtype=int)
            
            frames = []
            for i in indices:
                cap.set(cv2.CAP_PROP_POS_FRAMES, i)
                ret, frame = cap.read()
                
                if not ret:
                    continue
                
                # Convert BGR to RGB
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Resize frame
                frame = cv2.resize(frame, self.target_size)
                
                # Apply image augmentation
                frame = self.img_gen.random_transform(frame)
                
                # Normalize
                frame = frame / 255.0
                
                frames.append(frame)
            
            cap.release()
            
            # If we couldn't extract enough frames, return None
            if len(frames) < self.frames_per_video:
                return None
            
            return np.array(frames)
        except Exception as e:
            print(f"Error processing video {video_path}: {e}")
            return None
    
    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        if self.shuffle:
            np.random.shuffle(self.indexes)


In [6]:
def create_model(input_shape, num_classes):
    """
    Create an improved video classification model for shoplifting detection
    with class imbalance handling
    
    Parameters:
    - input_shape: Shape of input data (frames, height, width, channels)
    - num_classes: Number of output classes
    
    Returns:
    - Compiled Keras model
    """
    # Import required layers
    from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Flatten
    from tensorflow.keras.layers import Dense, Dropout, LSTM, TimeDistributed, Input
    from tensorflow.keras.layers import GlobalAveragePooling2D, Bidirectional
    from tensorflow.keras.models import Model
    from tensorflow.keras.optimizers import Adam
    
    # Input shape
    input_layer = Input(shape=input_shape)
    
    # First Conv block - extract basic features
    x = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'))(input_layer)
    x = TimeDistributed(BatchNormalization())(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    
    # Second Conv block - more complex features
    x = TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same'))(x)
    x = TimeDistributed(BatchNormalization())(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    
    # Third Conv block - detect higher level patterns
    x = TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same'))(x)
    x = TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same'))(x)  # Added depth
    x = TimeDistributed(BatchNormalization())(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    
    # Fourth Conv block - more specific features
    x = TimeDistributed(Conv2D(256, (3, 3), activation='relu', padding='same'))(x)
    x = TimeDistributed(Conv2D(256, (3, 3), activation='relu', padding='same'))(x)  # Added depth
    x = TimeDistributed(BatchNormalization())(x)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    
    # Use GlobalAveragePooling2D instead of Flatten to reduce parameters
    x = TimeDistributed(GlobalAveragePooling2D())(x)
    
    # Use Bidirectional LSTM for better temporal pattern detection
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    x = Dropout(0.4)(x)
    x = Bidirectional(LSTM(128))(x)
    x = Dropout(0.4)(x)
    
    # Dense layers for classification
    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    # Output layer
    output = Dense(num_classes, activation='softmax')(x)
    
    # Create model
    model = Model(inputs=input_layer, outputs=output)
    
    # Compile model with class weighting considerations
    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='categorical_crossentropy',
        metrics=['accuracy', 
                tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall'),
                tf.keras.metrics.AUC(name='auc')]
    )
    
    return model

In [7]:
# Function to train the model
def train_model(train_gen, val_gen, model, epochs):
    # Callbacks
    checkpoint = ModelCheckpoint(
        'best_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max',
        verbose=1
    )
    
    early_stopping = EarlyStopping(
        monitor='val_accuracy',
        patience=5,
        restore_best_weights=True,
        verbose=1
    )
    
    reduce_lr = ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6,
        verbose=1
    )
    
    # Train the model
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=epochs,
        callbacks=[checkpoint, early_stopping, reduce_lr]
    )
    
    return history, model


In [8]:
# Function to evaluate the model
def evaluate_model(model, test_gen):
    # Initialize arrays for predictions and ground truth
    all_predictions = []
    all_true_labels = []
    
    # Loop through the test generator
    for i in range(len(test_gen)):
        x, y = test_gen[i]
        
        # Get model predictions
        pred = model.predict(x)
        
        # Store predictions and true labels
        all_predictions.extend(np.argmax(pred, axis=1))
        all_true_labels.extend(np.argmax(y, axis=1))
    
    # Calculate metrics
    from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
    
    # Accuracy
    acc = accuracy_score(all_true_labels, all_predictions)
    print(f"\nTest Accuracy: {acc:.4f}")
    
    # Classification report
    print("\nClassification Report:")
    print(classification_report(all_true_labels, all_predictions, target_names=['Non-Shoplifter', 'Shoplifter']))
    
    # Confusion matrix
    print("\nConfusion Matrix:")
    cm = confusion_matrix(all_true_labels, all_predictions)
    print(cm)
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Non-Shoplifter', 'Shoplifter'],
                yticklabels=['Non-Shoplifter', 'Shoplifter'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    return all_true_labels, all_predictions


In [9]:
# Improved main function with class weight handling
def main():
    # Set paths to your data directories
    shoplifters_dir = '/kaggle/input/shoplifters/Shop DataSet/shop lifters'
    non_shoplifters_dir = '/kaggle/input/shoplifters/Shop DataSet/non shop lifters'
    
    # Create dataframe
    df = create_dataframe(shoplifters_dir, non_shoplifters_dir)
    
    # Perform EDA
    stats_df = perform_eda(df, shoplifters_dir, non_shoplifters_dir)
    
    # Split the data
    train_df, temp_df = train_test_split(df, test_size=0.4, random_state=CONFIG['seed'], stratify=df['label'])
    val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=CONFIG['seed'], stratify=temp_df['label'])
    
    print(f"Train set: {len(train_df)} videos")
    print(f"- Shoplifters: {len(train_df[train_df['label'] == 1])}")
    print(f"- Non-shoplifters: {len(train_df[train_df['label'] == 0])}")
    
    print(f"Validation set: {len(val_df)} videos")
    print(f"- Shoplifters: {len(val_df[val_df['label'] == 1])}")
    print(f"- Non-shoplifters: {len(val_df[val_df['label'] == 0])}")
    
    print(f"Test set: {len(test_df)} videos")
    print(f"- Shoplifters: {len(test_df[test_df['label'] == 1])}")
    print(f"- Non-shoplifters: {len(test_df[test_df['label'] == 0])}")
    
    # Calculate class weights to handle imbalance
    total = len(df)
    n_shoplifters = len(df[df['label'] == 1])
    n_non_shoplifters = len(df[df['label'] == 0])
    
    class_weight = {
        0: total / (2.0 * n_non_shoplifters),  # Weight for non-shoplifters
        1: total / (2.0 * n_shoplifters)       # Weight for shoplifters
    }
    print(f"Using class weights: {class_weight}")
    
    # Create data generators with balanced batch sampling
    train_gen = VideoDataGenerator(
        train_df,
        batch_size=CONFIG['batch_size'],
        frames_per_video=CONFIG['frames_per_video'],
        target_size=CONFIG['target_size'],
        num_classes=CONFIG['num_classes'],
        shuffle=True
    )
    
    val_gen = VideoDataGenerator(
        val_df,
        batch_size=CONFIG['batch_size'],
        frames_per_video=CONFIG['frames_per_video'],
        target_size=CONFIG['target_size'],
        num_classes=CONFIG['num_classes'],
        shuffle=False
    )
    
    test_gen = VideoDataGenerator(
        test_df,
        batch_size=CONFIG['batch_size'],
        frames_per_video=CONFIG['frames_per_video'],
        target_size=CONFIG['target_size'],
        num_classes=CONFIG['num_classes'],
        shuffle=False
    )
    
    # Create the model
    input_shape = (CONFIG['frames_per_video'], CONFIG['target_size'][0], CONFIG['target_size'][1], 3)
    model = create_model(input_shape, CONFIG['num_classes'])
    
    # Print model summary
    model.summary()
    
    # Train the model with class weights
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=CONFIG['epochs'],
        class_weight=class_weight,  # Add class weights
        callbacks=[
            ModelCheckpoint('best_model.keras', monitor='val_auc', save_best_only=True, mode='max', verbose=1),
            EarlyStopping(monitor='val_auc', patience=5, restore_best_weights=True, verbose=1),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
        ]
    )
    
    # Plot training history with additional metrics
    plt.figure(figsize=(16, 10))
    
    # Plot accuracy
    plt.subplot(2, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='lower right')
    
    # Plot loss
    plt.subplot(2, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper right')
    
    # Plot precision
    plt.subplot(2, 2, 3)
    plt.plot(history.history['precision'])
    plt.plot(history.history['val_precision'])
    plt.title('Model Precision')
    plt.xlabel('Epoch')
    plt.ylabel('Precision')
    plt.legend(['Train', 'Validation'], loc='lower right')
    
    # Plot recall
    plt.subplot(2, 2, 4)
    plt.plot(history.history['recall'])
    plt.plot(history.history['val_recall'])
    plt.title('Model Recall')
    plt.xlabel('Epoch')
    plt.ylabel('Recall')
    plt.legend(['Train', 'Validation'], loc='lower right')
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.close()
    
    # Evaluate the model
    y_true, y_pred = evaluate_model(model, test_gen)
    
    # Clean up to save memory
    del train_gen, val_gen, test_gen
    gc.collect()
    
    print("Model training and evaluation completed!")

In [10]:
if __name__ == "__main__":
    main()

Data Summary:
Total videos: 855
Shoplifter videos: 324
Non-shoplifter videos: 531

Video Statistics:
           label  frame_count        fps  width  height   duration
count  10.000000    10.000000  10.000000   10.0    10.0  10.000000
mean    0.500000   321.600000  24.908000  704.0   576.0  12.911585
std     0.527046    93.768272   0.120996    0.0     0.0   3.753895
min     0.000000   225.000000  24.750000  704.0   576.0   9.000000
25%     0.000000   255.750000  24.770000  704.0   576.0  10.257576
50%     0.500000   287.000000  25.000000  704.0   576.0  11.517519
75%     1.000000   365.750000  25.000000  704.0   576.0  14.777778
max     1.000000   475.000000  25.000000  704.0   576.0  19.000000


  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)
  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)


Train set: 513 videos
- Shoplifters: 194
- Non-shoplifters: 319
Validation set: 171 videos
- Shoplifters: 65
- Non-shoplifters: 106
Test set: 171 videos
- Shoplifters: 65
- Non-shoplifters: 106
Using class weights: {0: 0.8050847457627118, 1: 1.3194444444444444}


Epoch 1/10


  self._warn_if_super_not_called()


[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.4999 - auc: 0.5003 - loss: 1.1235 - precision: 0.4999 - recall: 0.4999
Epoch 1: val_auc improved from -inf to 0.51429, saving model to best_model.keras
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m510s[0m 4s/step - accuracy: 0.5001 - auc: 0.5005 - loss: 1.1234 - precision: 0.5001 - recall: 0.5001 - val_accuracy: 0.3860 - val_auc: 0.5143 - val_loss: 0.7296 - val_precision: 0.3860 - val_recall: 0.3860 - learning_rate: 1.0000e-04
Epoch 2/10
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.5350 - auc: 0.5693 - loss: 1.0132 - precision: 0.5350 - recall: 0.5350
Epoch 2: val_auc improved from 0.51429 to 0.56691, saving model to best_model.keras
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m481s[0m 4s/step - accuracy: 0.5350 - auc: 0.5693 - loss: 1.0131 - precision: 0.5350 - recall: 0.5350 - val_accuracy: 0.5205 - val_auc: 0.5669 - val_loss