In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import requests
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, Flatten, Concatenate, Lambda
from transformers import TFBertModel, BertTokenizer
import tensorflow as tf
from io import BytesIO

# Load BERT tokenizer and model once
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

# Cache for processed images
processed_images_cache = {}

# Image Preprocessing function (download and resize)
def preprocess_image(img_url):
    if img_url not in processed_images_cache:
        try:
            response = requests.get(img_url, stream=True)
            response.raise_for_status()  # Ensure the request is successful
            img = image.load_img(BytesIO(response.content), target_size=(224, 224))
            img_array = image.img_to_array(img) / 255.0  # Normalize image
            processed_images_cache[img_url] = img_array
        except Exception as e:
            print(f"Error processing image: {e}")
            return np.zeros((224, 224, 3))  # Return a zero array if error occurs
    return processed_images_cache[img_url]

# Text Preprocessing function (using BERT tokenizer)
def preprocess_text(text):
    return tokenizer(text, padding='max_length', truncation=True, max_length=128, return_tensors='tf')['input_ids']

# Read crime data CSV
crime_data = pd.read_csv('Crimes_-_2001_to_Present.csv (1).crdownload')

# Add image URLs to the dataset (Replace with actual URLs in a real case)
crime_data['image_url'] = ['https://images.squarespace-cdn.com/content/v1/57ba3b9e29687f1ef5ca5235/1481686391098-XCQ7R6AT35LP82NCBK4I/5203687_orig.jpg'] * len(crime_data)

# Model Architecture: Combine ResNet50 and BERT
image_input = Input(shape=(224, 224, 3))
resnet_out = Flatten()(ResNet50(weights='imagenet', include_top=False, input_tensor=image_input).output)

text_input = Input(shape=(128,), dtype=tf.int32)
text_out = Lambda(lambda x: bert_model(x)[1], output_shape=(768,))(text_input)

combined = Concatenate()([resnet_out, text_out])
x = Dense(256, activation='relu')(combined)
x = Dense(1, activation='sigmoid')(x)

model = tf.keras.models.Model(inputs=[image_input, text_input], outputs=x)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Efficient batch data processing
# Efficient batch data processing
def batch_data_processing(crime_data, batch_size=32):
    for i in range(0, len(crime_data), batch_size):
        batch = crime_data.iloc[i:i + batch_size]

        # Process images in batch
        X_image_batch = np.array([preprocess_image(url) for url in batch['image_url']])

        # Process the text inputs
        X_text_batch = []
        for text in batch['Description']:
            # Get the tokenized output (input_ids)
            tokenized_output = preprocess_text(text)

            # Extract input_ids and convert to NumPy array
            input_ids = tokenized_output.numpy().squeeze(axis=0)  # Ensure proper shape

            X_text_batch.append(input_ids)

        # Convert list of arrays to a NumPy array
        X_text_batch = np.array(X_text_batch)

        # Create labels: 1 for 'ROBBERY', 0 for other crime types
        y_batch = np.array([1 if crime == 'ROBBERY' else 0 for crime in batch['Primary Type']])

        # Yield the processed batch
        yield [X_image_batch, X_text_batch], y_batch

# Train the model for 1 epochs
for epoch in range(1):
    print(f"Epoch {epoch + 1}/5")
    for batch_data, batch_labels in batch_data_processing(crime_data):
        batch_size = len(batch_data[0])  # Get the number of samples in the batch
        model.fit(batch_data, batch_labels, epochs=1, batch_size=batch_size)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 87s/step - accuracy: 0.1875 - loss: 1.2224
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 38s/step - accuracy: 1.0000 - loss: 0.0000e+00
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35s/step - accuracy: 0.9688 - loss: 6.6173
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35s/step - accuracy: 1.0000 - loss: 5.0908e-09
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 34s/step - accuracy: 0.0625 - loss: 35.7830
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 34s/step - accuracy: 0.9062 - loss: 14.7301
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35s/step - accuracy: 0.9688 - loss: 1.8310
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 35s/step - accuracy: 0.0000e+00 - loss: 18.4072
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 36s/step - accuracy: 0.9375 - loss: 7.9546
[1m1/1[0m [32m━━━━

KeyboardInterrupt: 

In [None]:
# Test on a new image and text
test_img_url = 'https://example.com/new_robbery_image.jpg'  # Replace with an actual image URL
test_text = "A new robbery was reported near downtown."

# Preprocess the inputs
test_img_array = preprocess_image(test_img_url)
test_text_input = preprocess_text(test_text)

# Make the prediction
prediction = model.predict([test_img_array, test_text_input])

# Display the prediction
print(f'Prediction: {"Crime" if prediction[0][0] > 0.5 else "No Crime"}')

NameError: name 'preprocess_image' is not defined

In [None]:
!pip install tensorflowjs
import shutil
shutil.make_archive('tfjs_model', 'zip', 'tfjs_model')
from google.colab import files
files.download('tfjs_model.zip')



FileNotFoundError: [Errno 2] No such file or directory: 'tfjs_model'