CNN + LSTM Crime Detection

Dataset
https://www.kaggle.com/datasets/odins0n/ucf-crime-dataset

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# Define constants
IMAGE_SIZE = (64, 64)  # Example image size
BATCH_SIZE = 32
NUM_EPOCHS = 100
NUM_CLASSES = 14  # Number of classes
MAX_SEQ_LENGTH = 100  # Maximum sequence length for text data

# Directory containing subfolders for each class
train_dir = 'path/to/train_directory'
test_dir = 'path/to/test_directory'

In [None]:
# Data preprocessing and augmentation for images
image_data_generator = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Load training data from directory
train_image_generator = image_data_generator.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,  # Since we are using custom data, we set this to None
    shuffle=False  # Important for maintaining correspondence with text data
)

# Load test data from directory
test_image_generator = image_data_generator.flow_from_directory(
    test_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

In [None]:
# Load text data
train_texts = []
test_texts = []
train_labels = train_image_generator.classes
test_labels = test_image_generator.classes

# Assuming you have functions to load text data from files or other sources
def load_text_data(directory):
    # Load text data from files in the directory
    pass

# Load and preprocess text data
train_texts = load_text_data(train_dir)
test_texts = load_text_data(test_dir)

# Tokenize and pad text sequences
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train_texts)
train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

train_sequences_padded = pad_sequences(train_sequences, maxlen=MAX_SEQ_LENGTH, padding='post', truncating='post')
test_sequences_padded = pad_sequences(test_sequences, maxlen=MAX_SEQ_LENGTH, padding='post', truncating='post')

In [None]:
# Define LSTM model for text processing
def build_lstm_model(input_shape, vocab_size, embedding_dim):
    model = models.Sequential([
        layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=input_shape),
        layers.LSTM(64),
        layers.Dense(64, activation='relu'),
        layers.Dense(NUM_CLASSES, activation='softmax')
    ])
    return model

# Build and compile LSTM model
vocab_size = len(tokenizer.word_index) + 1  # Vocabulary size
embedding_dim = 64  # Example embedding dimension
lstm_model = build_lstm_model(input_shape=MAX_SEQ_LENGTH, vocab_size=vocab_size, embedding_dim=embedding_dim)
lstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train LSTM model
lstm_model.fit(train_sequences_padded, train_labels, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, validation_data=(test_sequences_padded, test_labels))

In [None]:
def build_cnn_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(512, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.5),  # Adding dropout for regularization
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.5),  # Adding dropout for regularization
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

In [None]:
# Build and compile CNN model
cnn_model = build_cnn_model(input_shape=(*IMAGE_SIZE, 3))
cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train CNN model
cnn_model.fit(
    train_image_generator,
    steps_per_epoch=train_image_generator.samples // BATCH_SIZE,
    epochs=NUM_EPOCHS,
    validation_data=test_image_generator,
    validation_steps=test_image_generator.samples // BATCH_SIZE
)

In [None]:
# Save the models
cnn_model.save('crime_detection_cnn_model.h5')
lstm_model.save('crime_detection_lstm_model.h5')