In [1]:
import re
import string
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Activation, Dropout
from tensorflow.keras.layers import Conv1D, Embedding, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.preprocessing import image

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/My Drive/Shakuntal/labels.csv')
df.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)
df = df.drop(columns = ['text_ocr'])
df.dropna(inplace=True)
df.head()

Unnamed: 0,image_name,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment
0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,positive
1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,positive
2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive
3,image_4.png,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive
4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral


In [4]:
cat_replace = {'not_funny': 0, 'funny': 1, 'very_funny': 1, 'hilarious': 1}
df['humour'] = df['humour'].replace(cat_replace)
cat_replace = {'not_sarcastic': 0, 'general': 1, 'twisted_meaning': 1, 'very_twisted': 1}
df['sarcasm'] = df['sarcasm'].replace(cat_replace)
cat_replace = {'not_offensive': 0, 'slight': 1, 'very_offensive': 1, 'hateful_offensive': 1}
df['offensive'] = df['offensive'].replace(cat_replace)
cat_replace = {'not_motivational': 0, 'motivational': 1}
df['motivational'] = df['motivational'].replace(cat_replace)

In [63]:
cleaned = df.copy()
cleaned.dropna(inplace=True)
cleaned.isnull().any()

Unnamed: 0,0
image_name,False
text_corrected,False
humour,False
sarcasm,False
offensive,False
motivational,False
overall_sentiment,False


In [6]:
rows_to_drop = ['image_120.jpg',
              'image_4800.jpg',
              'image_6782.jpg',
              'image_6785.jpg',
              'image_6787.jpg',
              'image_6988.jpg',
              'image_6989.jpg',
              'image_6990.png',
              'image_6991.jpg',
              'image_6992.jpg']

In [7]:
for images in rows_to_drop:
    df.drop(df[df['image_name'] == images].index, inplace=True)

In [8]:
import os
from PIL import Image

width = 100
height = 100
X = []

image_folder = '/content/drive/My Drive/Shakuntal/images/'
for image_name in tqdm(df['image_name']):
    path = os.path.join(image_folder, image_name)
    img = Image.open(path).convert('RGB').resize((width, height))
    img = np.array(img)/255.0
    X.append(img)

X = np.array(X)

100%|██████████| 6982/6982 [1:15:40<00:00,  1.54it/s]


In [64]:
target = cleaned.iloc[:,2:]
target.head()

Unnamed: 0,humour,sarcasm,offensive,motivational,overall_sentiment
0,1,1,0,0,positive
1,0,1,0,1,positive
2,1,0,0,0,positive
3,1,1,1,1,positive
4,1,1,1,0,neutral


In [65]:
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size = 0.2)


In [61]:
import tensorflow as tf

from tensorflow.keras.layers import Conv1D, Embedding, GlobalAveragePooling1D,Input

image_input = Input(shape=(100, 100, 3), name='image_input')

data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip('horizontal'),
    tf.keras.layers.RandomContrast(0.5),  # Changed from [0.5, 2] to a single float
    tf.keras.layers.RandomRotation(0.2),
    tf.keras.layers.RandomZoom(0.1)
])
preprocess_input = tf.keras.applications.resnet_v2.preprocess_input
rescale = tf.keras.layers.Rescaling(1./127.5, offset=-1)
augmented_image = data_augmentation(image_input)

In [None]:
plt.figure(figsize=(10, 10))
for i in range(9):
  augmented_image = data_augmentation(X)
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(augmented_image[0])
  plt.axis("off")

In [46]:
from tensorflow.keras.layers import Concatenate
model_1 = tf.keras.applications.ResNet50(input_shape=X[0].shape,
                                              include_top=False,
                                              weights='imagenet')
model_2 = tf.keras.applications.VGG16(input_shape=X[0].shape,
                                           include_top=False,
                                           weights='imagenet')

# Set base models to be non-trainable
for base_model in [model_1, model_2]:
    base_model.trainable = False

# Define image model function
def image_model():
    inputs = tf.keras.Input(shape=X[0].shape)
    x = data_augmentation(inputs)
    x = preprocess_input(x)
    x1 = model_1(x)
    x1 = Dropout(0.2)(x1)
    x1 = Conv2D(2048, kernel_size=2, padding='valid')(x1)
    x1 = Dropout(0.3)(x1)
    x1 = Dense(512)(x1)
    x1 = Dropout(0.5)(x1)
    x2 = model_2(x)
    x2 = Dropout(0.4)(x2)
    x2 = Dense(512)(x2)
    x2 = Dropout(0.2)(x2)
    x = Concatenate()([x1, x2])
    x = Dropout(0.3)(x)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.5)(x)
    return inputs, x

# Create image model
image_input, image_layers = image_model()

In [47]:
def standardization(data):
    data = data.apply(lambda x: x.lower())
    data = data.apply(lambda x: re.sub(r'\d+', '', x))
    data = data.apply(lambda x: re.sub(r'\w*.com\w*', '', x, flags=re.MULTILINE))
    data = data.apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
    return data

df['text_corrected'] = standardization(df.text_corrected)

In [48]:
from tensorflow.keras.layers import TextVectorization
import numpy as np
import pandas as pd

# Assuming df is your DataFrame containing a 'text_corrected' column

vocab_size = 10000
sequence_length = 200

vectorize_layer = TextVectorization(
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=sequence_length
)

# Convert the DataFrame column to a list of strings
text_ds = df['text_corrected'].astype(str).tolist()

# Adapting the layer to the dataset
vectorize_layer.adapt(text_ds)

# Example: vectorizing the text data after adaptation
vectorized_text = vectorize_layer(np.array(text_ds))



In [49]:
X_text = df.text_corrected


In [50]:
X_text_train, X_text_test, y_text_train, y_text_test = train_test_split(X_text, y, test_size = 0.3)

In [51]:
embedding_dim=32

def text_model():
    text_input = tf.keras.Input(shape=(None,), dtype=tf.string, name='text')
    text_layers = vectorize_layer(text_input)
    text_layers = tf.keras.layers.Embedding(vocab_size, embedding_dim, name="embedding")(text_layers)
    dropout_layer = Dropout(0.3)(text_layers)

    text_layers = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(512, activation='relu', return_sequences=True))(text_layers)
    dropout_layer = Dropout(0.4)(text_layers)
    text_layers = tf.keras.layers.BatchNormalization()(text_layers)

    text_layers = tf.keras.layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(text_layers)
    dropout_layer = Dropout(0.2)(text_layers)
    text_layers = tf.keras.layers.GlobalMaxPooling1D()(text_layers)
    dropout_layer = Dropout(0.5)(text_layers)

    text_layers = tf.keras.layers.Dense(2048, activation="relu")(text_layers)
    text_layers = tf.keras.layers.Dropout(0.2)(text_layers)
    return text_input, text_layers

text_input, text_layers = text_model()

In [52]:
def model(layer_1, layer_2, image_input, text_input):
    concatenate = tf.keras.layers.concatenate([layer_1, layer_2], axis=1)
    semi_final_layer = tf.keras.layers.Dense(1024, activation='relu')(concatenate)

    prediction_layer_1 = tf.keras.layers.Dense(1, activation='softmax', name = 'humuor')
    prediction_layer_2 = tf.keras.layers.Dense(1, activation='softmax', name = 'sarcasm')
    prediction_layer_3 = tf.keras.layers.Dense(1, activation='softmax', name = 'offensive')
    prediction_layer_4 = tf.keras.layers.Dense(1, activation='softmax', name = 'motivational')

    output_1 = prediction_layer_1(semi_final_layer)
    output_2 = prediction_layer_2(semi_final_layer)
    output_3 = prediction_layer_3(semi_final_layer)
    output_4 = prediction_layer_4(semi_final_layer)

    model = tf.keras.Model(inputs = [image_input, text_input] ,
                           outputs = [output_1, output_2, output_3, output_4])
    return model

In [53]:
model = model(image_layers, text_layers, image_input, text_input)


In [54]:
base_learning_rate = 0.0001
losses = {
      "humuor": "sparse_categorical_crossentropy",
      "sarcasm": "sparse_categorical_crossentropy",
      "offensive": "sparse_categorical_crossentropy",
      "motivational": "sparse_categorical_crossentropy"
}
lossWeights = {
      "humuor": 1.0,
      "sarcasm": 1.0,
      "offensive": 1.0,
      "motivational": 1.0
}
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss = losses,
              loss_weights= lossWeights,
              metrics=['accuracy'])

In [55]:
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint

# Define early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3, # number of epochs to wait before stopping
    restore_best_weights=True
)

# Define tensorboard callback
tensorboard = TensorBoard(log_dir='./logs')

# Define model checkpoint callback
checkpoint_prefix = "./checkpoints/checkpoint.weights.h5"
model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
)

In [60]:
history = model.fit(
    x={"input_layer_3": X_train, "text": X_text_train},
    y={"sarcasm": y_train.sarcasm,
       "humuor": y_train.humour,
       "offensive": y_train.offensive,
       "motivational": y_train.motivational},
    batch_size=32,
    epochs=5,
    validation_data=({"input_layer_3": X_test, "text": X_text_test},
                     {"sarcasm": y_test.sarcasm,
                      "humuor": y_test.humour,
                      "offensive": y_test.offensive,
                      "motivational": y_test.motivational}),
    callbacks=[early_stopping, tensorboard, model_checkpoint]
)

Epoch 1/5


AttributeError: 'NoneType' object has no attribute 'items'

In [58]:

# Compile the model
model.compile(optimizer='adam',
              loss={'sarcasm': 'binary_crossentropy',
                    'humuor': 'binary_crossentropy',
                    'offensive': 'binary_crossentropy',
                    'motivational': 'binary_crossentropy'},
              metrics=['accuracy'])
