In [1]:
import random
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os
import pandas as pd
from PIL import Image, ImageOps

In [2]:
!pip install transformers
from transformers import BertTokenizer, TFBertModel



In [3]:
!pip install efficientnet
from efficientnet.tfkeras import EfficientNetB0



Load BERT model and tokenizer

In [4]:
bert_model = TFBertModel.from_pretrained('bert-base-uncased')
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [5]:
# Load EfficientNet model
efficientnet_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [6]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [7]:
import shutil
shutil.unpack_archive("/content/drive/MyDrive/MultiOFF_Dataset.zip", "/content/Datasets/")

In [8]:
dataset_path = '/content/Datasets/MultiOFF_Dataset/Split Dataset'

train_df = pd.read_csv(os.path.join(dataset_path, 'Training_meme_dataset.csv'))
test_df = pd.read_csv(os.path.join(dataset_path, 'Testing_meme_dataset.csv'))
val_df = pd.read_csv(os.path.join(dataset_path, 'Validation_meme_dataset.csv'))

In [9]:
image_folder = '/content/Datasets/MultiOFF_Dataset/Labelled Images'
train_df['image_name'] = train_df['image_name'].apply(lambda x: os.path.join(image_folder, x))
test_df['image_name'] = test_df['image_name'].apply(lambda x: os.path.join(image_folder, x))
val_df['image_name'] = val_df['image_name'].apply(lambda x: os.path.join(image_folder, x))

In [10]:
train_df.head()

Unnamed: 0,image_name,sentence,label
0,/content/Datasets/MultiOFF_Dataset/Labelled Im...,OFFICIAL BERNIE SANDERS DRINKING GAME ! Every ...,Non-offensiv
1,/content/Datasets/MultiOFF_Dataset/Labelled Im...,2:28 PM THIS IS A WALL INSIDE A NAZI GAS CHAMB...,offensive
2,/content/Datasets/MultiOFF_Dataset/Labelled Im...,o shit waddup ! BERNIE SANDERS COM,offensive
3,/content/Datasets/MultiOFF_Dataset/Labelled Im...,`` MITT ROMNEY IS THE WORST REPUBLICAN IN THE ...,Non-offensiv
4,/content/Datasets/MultiOFF_Dataset/Labelled Im...,Anonymous ( ID : duqdA1io a 08/05/16 ( Fri ) 1...,Non-offensiv


In [11]:
def load_images_from_df(df):
    image_samples = []

    for image_name in df['image_name']:
        image = Image.open(image_name)  # Open the image file

        # Resize the image while maintaining the aspect ratio
        aspect_ratio = image.width / image.height
        target_size = (224, int(224 / aspect_ratio))
        image = image.resize(target_size, Image.LANCZOS)

        # Add padding if the image dimensions are smaller than (224, 224)
        if target_size[1] < 224:
            padding = (0, (224 - target_size[1]) // 2, 0, (224 - target_size[1]) // 2)
            image = ImageOps.expand(image, padding)

        # Convert the image to a numpy array
        image = np.array(image)
        image_samples.append(image)

    return np.array(image_samples)

In [12]:
image_train = load_images_from_df(train_df)
image_test = load_images_from_df(test_df)
image_val = load_images_from_df(val_df)

  return np.array(image_samples)


In [13]:
image_train.shape

(445,)

In [14]:
len(train_df)

445

In [15]:
text_train = train_df['sentence'].tolist()
text_test = test_df['sentence'].tolist()
text_val = val_df['sentence'].tolist()
len(text_train)

445

In [16]:
labels_train = train_df['label'].values
labels_test = test_df['label'].values
labels_val = val_df['label'].values

In [17]:
# Augmentation using Image-Text Swapping
augmented_text = []
augmented_image = []

for i in range(len(text_train)):
    swapped_idx = random.randint(0, len(text_train) - 1)

    augmented_text.append(text_train[i])
    augmented_image.append(image_train[swapped_idx])

# # Convert the lists to numpy arrays
# augmented_text = np.array(augmented_text)
# augmented_image = np.array(augmented_image)

# # Print the shape of augmented data
# print("Shape of augmented_text:", augmented_text.shape)
# print("Shape of augmented_image:", augmented_image.shape)

In [18]:
# Tokenize text data for BERT input
inputs = bert_tokenizer(augmented_text, padding=True, truncation=True, return_tensors="tf")

# Convert image data to NumPy array
image_data = np.array(augmented_image)

  image_data = np.array(augmented_image)


In [19]:
max_seq_length = max([len(tokens) for tokens in augmented_text])
batch_size = 16
num_samples = len(augmented_text)
num_batches = (num_samples + batch_size - 1) // batch_size

all_text_embeddings = []

for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, num_samples)
    batch_inputs = bert_tokenizer(augmented_text[start_idx:end_idx], padding='max_length', max_length=max_seq_length, truncation=True, return_tensors="tf")
    batch_embeddings = bert_model(batch_inputs)["last_hidden_state"]
    all_text_embeddings.append(batch_embeddings)

text_embeddings = tf.concat(all_text_embeddings, axis=0)

ResourceExhaustedError: ignored

In [None]:
# Generate BERT embeddings for text data
text_embeddings = bert_model(inputs)["last_hidden_state"]

In [None]:
# Process image data through EfficientNet
image_embeddings = efficientnet_model.predict(image_data)


In [None]:
# Combine text and image embeddings
combined_embeddings = tf.concat([text_embeddings, image_embeddings], axis=1)


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(combined_embeddings.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
model.fit(combined_embeddings, labels_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# Evaluate the model on the validation set
val_inputs = bert_tokenizer(text_val, padding=True, truncation=True, return_tensors="tf")
val_text_embeddings = bert_model(val_inputs)["last_hidden_state"]
val_image_embeddings = efficientnet_model.predict(image_val)
val_combined_embeddings = tf.concat([val_text_embeddings, val_image_embeddings], axis=1)

val_predictions = model.predict(val_combined_embeddings)
val_predictions_binary = [1 if pred >= 0.5 else 0 for pred in val_predictions]

val_accuracy = accuracy_score(labels_val, val_predictions_binary)
print("Validation Accuracy:", val_accuracy)