In [1]:
from google.colab import files
import os
import shutil
import cv2
import numpy as np
from keras.applications import VGG16
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"nandaniraghuwanshi","key":"e4061dd001171f29cadd86afc3b61c37"}'}

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [4]:
!kaggle datasets list -s DCSASS

ref                                       title                    size  lastUpdated          downloadCount  voteCount  usabilityRating  
----------------------------------------  ----------------------  -----  -------------------  -------------  ---------  ---------------  
mateohervas/dcsass-dataset                DCSASS Dataset            1GB  2019-05-26 22:57:53           3805         46  0.64705884       
skriyaz26/dcsass-videos                   DCSASS videos           122MB  2024-03-25 14:50:31              1          1  0.1875           
hossamfakher/dcsass-classification        DCSASS classification     3GB  2022-02-15 14:46:03             23          2  0.11764706       
skriyaz26/dcsass-videos-dataset           DCSASS Videos dataset   579MB  2024-04-10 03:21:16              7          0  0.125            
shreyasaniljadhav/theft-detection-system  Theft Detection System    4KB  2024-04-21 17:09:36              0          0  0.375            
deepchanddc2/shoplifting-only     

In [5]:
!kaggle datasets download mateohervas/dcsass-dataset

Downloading dcsass-dataset.zip to /content
 99% 1.34G/1.35G [00:07<00:00, 153MB/s]
100% 1.35G/1.35G [00:07<00:00, 188MB/s]


In [6]:
!unzip dcsass-dataset.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_30.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_31.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_4.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_5.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_6.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_7.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_8.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery142_x264.mp4/Robbery142_x264_9.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery145_x264.mp4/Robbery145_x264_0.mp4  
  inflating: dcsass dataset/DCSASS Dataset/Robbery/Robbery145_x264.mp4/Robbery

In [7]:
# Define paths to the dataset
dataset_path = "/content/dcsass dataset/DCSASS Dataset"
output_path = "/content/New_Dcsass_Dataset"

# Define the label to filter for
target_label = "Shoplifting"

# Check if the target label folder exists
shoplifting_folder = os.path.join(dataset_path, target_label)
if os.path.isdir(shoplifting_folder):
    # Copy the entire target label folder to the output folder
    shutil.copytree(shoplifting_folder, os.path.join(output_path, target_label))
    print(f"Copied folder {target_label} to {output_path}")
else:
    print(f"Target label folder {target_label} not found.")

print("Done copying Shoplifting data.")

Copied folder Shoplifting to /content/New_Dcsass_Dataset
Done copying Shoplifting data.


In [8]:
# Define the path to the directory containing the videos
video_dir = "/content/New_Dcsass_Dataset/Shoplifting"

In [9]:
# Function to preprocess videos for CNN model
def preprocess_for_cnn(video_dir):
    X = []  # List to store preprocessed frames
    y = []  # List to store labels (1 for shoplifting, 0 for normal)

    # Iterate over each subdirectory (e.g., Shoplifting001_x264.mp4, Shoplifting002_x264.mp4, etc.)
    for sub_dir in os.listdir(video_dir):
        sub_dir_path = os.path.join(video_dir, sub_dir)
        if os.path.isdir(sub_dir_path):
            print(f"Processing videos in subdirectory: {sub_dir}")

            # Label encoding: 1 for shoplifting, 0 for normal
            label = 1 if "Shoplifting" in sub_dir else 0

            # Iterate over each video file in the subdirectory
            for video_file in os.listdir(sub_dir_path):
                if video_file.endswith(".mp4"):
                    video_path = os.path.join(sub_dir_path, video_file)
                    cap = cv2.VideoCapture(video_path)

                    # Read frames from the video
                    while cap.isOpened():
                        ret, frame = cap.read()
                        if not ret:
                            break

                        # Resize frame to a fixed size (e.g., 224x224)
                        frame = cv2.resize(frame, (224, 224))



                        # Append preprocessed frame and label to the lists
                        X.append(frame)
                        y.append(label)

                    cap.release()

    # Convert lists to numpy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y

# Preprocess the videos for CNN model
X, y = preprocess_for_cnn(video_dir)

Processing videos in subdirectory: Shoplifting010_x264.mp4
Processing videos in subdirectory: Shoplifting048_x264.mp4
Processing videos in subdirectory: Shoplifting025_x264.mp4
Processing videos in subdirectory: Shoplifting054_x264.mp4
Processing videos in subdirectory: Shoplifting021_x264.mp4
Processing videos in subdirectory: Shoplifting001_x264.mp4
Processing videos in subdirectory: Shoplifting050_x264.mp4
Processing videos in subdirectory: Shoplifting018_x264.mp4
Processing videos in subdirectory: Shoplifting039_x264.mp4
Processing videos in subdirectory: Shoplifting032_x264.mp4
Processing videos in subdirectory: Shoplifting022_x264.mp4
Processing videos in subdirectory: Shoplifting026_x264.mp4
Processing videos in subdirectory: Shoplifting009_x264.mp4
Processing videos in subdirectory: Shoplifting006_x264.mp4
Processing videos in subdirectory: Shoplifting049_x264.mp4
Processing videos in subdirectory: Shoplifting047_x264.mp4
Processing videos in subdirectory: Shoplifting015_x264.m

In [10]:
# Check the shape of preprocessed data
print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (61440, 224, 224, 3)
y shape: (61440,)


In [11]:
# Function to extract features from preprocessed frames using a pre-trained CNN model
def extract_features(X):
    # Load pre-trained VGG16 model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

    # Remove the classification layers
    model = Model(inputs=base_model.input, outputs=base_model.output)

    # Extract features from preprocessed frames
    features = model.predict(X, verbose=1)

    return features

# Extract features from preprocessed frames
features = extract_features(X)

# Check the shape of extracted features
print("Features shape:", features.shape)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Features shape: (61440, 7, 7, 512)


In [12]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

# Define data augmentation generator
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Build a feedforward neural network classifier
model = Sequential()
model.add(Flatten(input_shape=X_train.shape[1:]))  # Flatten the features
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with data augmentation
history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, epochs=5,
                    validation_data=(X_test, y_test))

# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Loss: 0.0000
Test Accuracy: 1.0000


In [14]:
# Make predictions on test data
y_pred_proba = model.predict(X_test)

# Convert probabilities to class labels based on a threshold
y_pred = (y_pred_proba > 0.5).astype(int)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1 Score: 1.0000
