In [None]:
!nvidia-smi

In [None]:
!pip install transformers
!pip install simpletransformers

In [None]:
!gdown --id 1lSu7e7BmF1xUHIVMldWuKBh_10Qbsgi_ #multi_modal_classification_model
!mv "/content/multi_modal_classification_model.py" "/usr/local/lib/python3.7/dist-packages/simpletransformers/classification/multi_modal_classification_model.py"

NOTE: Run the previous two cells and restart runtime before continuing 

#Setup and Config

In [None]:
!gdown --id 10RR747R3UAUJCUkFvrebt6ok11iJgOqj

In [None]:
!unzip "/content/MAMI.zip"
!rm MAMI.zip

In [None]:
import sklearn
import pandas as pd
import numpy as np
import torch
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from simpletransformers.classification import MultiModalClassificationModel, MultiModalClassificationArgs
import seaborn as sns

In [None]:
import logging

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
seed = 777

np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

#Data

In [None]:
df = pd.read_csv('/content/MAMI/TRAINING/training.csv', sep='\t')
df

Unnamed: 0,file_name,misogynous,shaming,stereotype,objectification,violence,Text Transcription
0,1.jpg,0,0,0,0,0,Milk Milk.zip
1,10.jpg,1,0,0,0,1,"ROSES ARE RED, VIOLETS ARE BLUE IF YOU DON'T S..."
2,1000.jpg,0,0,0,0,0,BREAKING NEWS: Russia releases photo of DONALD...
3,10000.jpg,0,0,0,0,0,MAN SEEKING WOMAN Ignad 18 O
4,10006.jpg,0,0,0,0,0,Me explaining the deep lore of. J.R.R. Tolkein...
...,...,...,...,...,...,...,...
9995,15002.jpg,0,0,0,0,0,WAITING FOR THE END OF THE COVID imgflip.com
9996,15003.jpg,0,0,0,0,0,SMART WOMEN ARE AROUND imgflip.com
9997,15004.jpg,0,0,0,0,0,GOOD GIRLS ARE BEHIND THE CORNER imgflip.com
9998,15005.jpg,0,0,0,0,0,COOKING FOR MY WIFE imgflip.com


In [None]:
import nltk
import re 
from nltk import word_tokenize

nltk.download('punkt')

def preprocess_text(text):
  # Remove extra whitespaces
  text = text.strip()

  # Remove twitter usernames, web addresses
  text = text = re.sub(r"#[\w\d]*|@[.]?[\w\d]*[\'\w*]*|https?:\/\/\S+\b|"r"www\.(\w+\.)+\S*|", '', text)

  # Remove html tags
  text = re.sub(re.compile('<.*?>'), ' ', text)

  # Remove unwanted characters
  text = word_tokenize(text)
  text = ' '.join(word for word in text if word.isalpha() or word.isnumeric() or word.isalnum())
  
  return text

In [None]:
train_images = np.array(df['file_name'])
train_text = np.array([preprocess_text(text) for text in df['Text Transcription']])
train_labels = np.array(df.misogynous)

In [None]:
train_data = []

for i in range(len(df)):
  train_data.append([train_images[i], train_text[i], train_labels[i]])

In [None]:
train_data = pd.DataFrame(train_data, columns=['images','text','labels'])
train_data

Unnamed: 0,images,text,labels
0,1.jpg,Milk,0
1,10.jpg,ROSES ARE RED VIOLETS ARE BLUE IF YOU DO SAY Y...,1
2,1000.jpg,BREAKING NEWS Russia releases photo of DONALD ...,0
3,10000.jpg,MAN SEEKING WOMAN Ignad 18 O,0
4,10006.jpg,Me explaining the deep lore of Tolkein world o...,0
...,...,...,...
9995,15002.jpg,WAITING FOR THE END OF THE COVID,0
9996,15003.jpg,SMART WOMEN ARE AROUND,0
9997,15004.jpg,GOOD GIRLS ARE BEHIND THE CORNER,0
9998,15005.jpg,COOKING FOR MY WIFE,0


In [None]:
# train_df, val_df = train_test_split(train_df, test_size=0.1)

In [None]:
print(len(train_data))

10000


#Model

In [None]:
from sklearn.model_selection import KFold
import sklearn

In [None]:
n = 5
kf = KFold(n_splits=n, random_state=seed, shuffle=True)
results = []

model_args = MultiModalClassificationArgs(
    num_train_epochs=1,
    fp16=False,
    train_batch_size=32,
    learning_rate=1e-5,
    max_seq_length=64,
    save_best_model=True,
    overwrite_output_dir=True,
    save_model_every_epoch=False,
    gradient_accumulation_steps=1,
    do_lower_case=True
)

model = MultiModalClassificationModel(
  'bert',
  'bert-base-uncased',
  use_cuda=True,
  label_list=[0,1],
  args=model_args
) 

model.config.use_return_dict = False

for train_index, val_index in kf.split(train_data):
    train_df = train_data.iloc[train_index]
    val_df = train_data.iloc[val_index]
    
    model.train_model(
        train_df, 
        image_path='/content/MAMI/TRAINING'
    )

    result, model_outputs = model.eval_model(
        val_df, 
        acc=sklearn.metrics.accuracy_score,
        image_path='/content/MAMI/TRAINING'
    )

    print(result['acc'])
    results.append(result['acc'])

#Val Data

In [None]:
result, model_outputs = model.eval_model(val_df, image_path='/content/MAMI/TRAINING')

In [None]:
np.array(val_df.text)[2]

In [None]:
model_outputs[15]

In [None]:
from torch.nn.functional import softmax

_, preds = torch.max(torch.tensor(model_outputs), dim=1)
preds[2]

In [None]:
from sklearn.metrics import f1_score

val_labels = np.array(val_df.labels)

print("F1 macro:   {}".format(round(f1_score(val_labels, preds, average="macro"), 3)))
print("F1 micro:   {}".format(round(f1_score(val_labels, preds, average="micro"), 3)))

#Predict Test Data

In [None]:
import pandas as pd
import numpy as np

test_df = pd.read_csv('/content/MAMI/TEST/Test.csv', sep='\t')

In [None]:
test_df['labels'] = [1] * 1000

In [None]:
test_df.rename(columns={'file_name': 'images', 'Text Transcription': 'text'}, inplace=True)

In [None]:
test_df['text'] =  np.array([preprocess_text(text) for text in test_df['text']])

Labels here are fake. only used because model.eval_model() method requires them

In [None]:
test_df

Unnamed: 0,images,text,labels
0,15236.jpg,FACEBOOK SINGLES GROUPS BELIKE WHEN A NEW WOMA...,1
1,15805.jpg,SO IF YOU A FEMINIST HOW CAN YOU EAT DAIRY,1
2,16254.jpg,WHEN A CUTE GIRL LEFT YOUR MESSAGE ON SEEN,1
3,16191.jpg,Photographing something you want to show every...,1
4,15952.jpg,HEY BABE CAN YOU MAKE ME A SANDWICH Hey babe c...,1
...,...,...,...
995,15591.jpg,IT NOT YOUR FAULT You did design the dress you...,1
996,15049.jpg,THINK ABOUT HOW MUCH BETTER HER SKIN IS BREATH...,1
997,15363.jpg,THE STEREOTYPES ARE TRUE F SHE DOES HAVE A TIG...,1
998,15199.jpg,DRAWS NAKED PICTURES OF BLACK WOMEN 00 0000 GE...,1


In [None]:
result, model_outputs = model.eval_model(test_df, image_path='/content/MAMI/TEST')

Running Evaluation:   0%|          | 0/125 [00:00<?, ?it/s]

INFO:simpletransformers.classification.multi_modal_classification_model:{'mcc': 0.0, 'tp': 722, 'tn': 0, 'fp': 0, 'fn': 278, 'acc': 0.936, 'eval_loss': 0.7911134588420391}


In [None]:
_, preds = torch.max(torch.tensor(model_outputs), dim=1) # [0.35 0.65]

In [None]:
with open('answer.txt', 'w') as f:
  for i, pred in enumerate(preds):
    f.write(f'{test_df.images[i]}\t{pred}\n')

# Temp voting 3

In [None]:
def hard_voting(answer1, answer2, answer3):
  final_answer = []
  answers = [answer1, answer2, answer3]

  for i in range(len(answer1)):
    cnt0, cnt1 = 0, 0
    for a in answers:
      if a[i] == '1':
        cnt1 += 1
      else:
        cnt0 += 1
        
    if cnt1 > cnt0:
      final_answer.append(1)
    else:
      final_answer.append(0)

  with open('final_answer.txt', 'w') as f:
    for i, pred in enumerate(final_answer):
      f.write(f'{test_df.images[i]}\t{pred}\n')