# Image Caption-based

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<img align="left" src="Images\unimodal-captions.png" width="600">

## Imports and set-up

In [1]:
from Utils import load_data, preprocessing, model_performances
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import json
from sklearn.metrics import classification_report

In [2]:
seed = 222
np.random.seed(seed) # numpy seed
tf.random.set_seed(seed) # works for all devices (CPU and GPU)

In [4]:
# ________________________________________Utils ___________________________________________________
if not os.path.exists('./Unimodal/predictions'):
    os.makedirs('./Unimodal/predictions')

if not os.path.exists('./Unimodal/performances'):
    os.makedirs('./Unimodal/performances')

path_models = './Unimodal/models'
file_out = './Unimodal/performances/Captions_results_10Fold.txt'
predictions_csv_path = './Unimodal/predictions/Captions_pred_10Fold.csv'

file = open(file_out, 'a+')
file.truncate(0)  # erase file content
file.close()

label_column = "misogynous"
input_columns = ['caption_USE']
threshold = 0.5

In [5]:
embed_size = 512  # 512-length array with Universal Sentence Encoder algorithm
batch_size = 64
epochs = 100

## Load Data

In [6]:
# ________________________________________load training data ___________________________________________________
meme_df = load_data.load_azure_caption_training()

meme_df['caption_USE'] = preprocessing.use_preprocessing(meme_df, 'caption')


## 10 Fold on training data

In [7]:
# ________________________________________train model on training data 10Fold________________________________________
kf = KFold(n_splits=10, shuffle=False)

iteration = 0
real_values = np.array([])
predict_values = np.array([])
ids = np.array([])


In [None]:
for train_index, test_index in kf.split(meme_df):  # split into train and test
    preprocessing.set_seed(iteration)
    x_train, y_train, x_val, y_val, x_test, y_test = preprocessing.elaborate_data_10fold(meme_df,
                                                                                                           train_index,
                                                                                                           test_index,
                                                                                                           iteration,
                                                                                                           input_columns,
                                                                                                           label_column)
    model, history = model_performances.get_trained_model(x_train, 
                                                        y_train, 
                                                        x_val, 
                                                        y_val,
                                                        input_shape=embed_size, 
                                                        activation_function='LeakyReLU', 
                                                        neurons=embed_size/2, 
                                                        dropout=0.2, 
                                                        epochs=100)
    iteration = iteration + 1

    # make prediction on training data
    pred = model.predict(x_test, batch_size=batch_size)

    predict_values = np.append(predict_values, pred)
    real_values = np.append(real_values, y_test)
    ids = np.append(ids, meme_df.iloc[test_index, :]['file_name'].tolist())

    result_df = meme_df.iloc[test_index, [0, 1]]
    result_df['score_col'] = pred

    # write on file
    file = open(file_out, "a+")
    file.write('\n\nITERAZIONE ' + str(iteration) + '\n')
    file.write(json.dumps(model_performances.compute_confusion_rates(result_df, 'score_col', 'misogynous', threshold)))
    file.write('\n') 
    file.write(classification_report(result_df['misogynous'].values, (result_df['score_col']>threshold).astype(int).values, target_names=['not_mis','mis']))
    file.close()

In [9]:
# results dataframe, save predictions
result_df = pd.DataFrame({'id': ids, 'real': real_values.astype(int), 'pred': predict_values})
result_df.to_csv(predictions_csv_path, index=False, sep='\t')

# Overall metrics _ write on file
file = open(file_out, "a+")
file.write('\n\n10 Fold Results ' + str(iteration) + '\n')
file.write(json.dumps(model_performances.compute_confusion_rates(result_df, 'pred', 'real', threshold)))
file.write('\n') 
file.write(classification_report(result_df['real'].values, (result_df['pred']>threshold).astype(int).values, target_names=['not_mis','mis']))
file.write('\n AUC:') 
file.write(str(model_performances.compute_auc(result_df['real'].values, result_df['pred'].values)))
file.close()

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()

## 10 Fold on test data

In [11]:
# ________________________________________Utils ___________________________________________________
if not os.path.exists('./Unimodal/predictionsTest'):
    os.makedirs('./Unimodal/predictionsTest')

if not os.path.exists('./Unimodal/performancesTest'):
    os.makedirs('./Unimodal/performancesTest')

path_models = './Unimodal/modelsTest'
file_out = './Unimodal/performancesTest/Captions_results_10Fold.txt'
predictions_csv_path = './Unimodal/predictionsTest/Captions_pred_10Fold.csv'

file = open(file_out, 'a+')
file.truncate(0)  
file.close()

In [12]:
# Load Test and preprocessing
test_df = load_data.load_azure_caption_test()
test_df['caption_USE'] = preprocessing.use_preprocessing(test_df, 'caption')

x_test, y_test = preprocessing.elaborate_input(test_df, input_columns, label_column)

In [13]:
# ________________________________________train model on training data 10Fold________________________________________
kf = KFold(n_splits=10, shuffle=False)

iteration = 0
real_values = np.array([])
predict_values = np.array([])
ids = np.array([])

In [None]:
for train_index, val_index in kf.split(meme_df):  # split into train and test
    preprocessing.set_seed(iteration)
    x_train, y_train = preprocessing.elaborate_input(meme_df.iloc[train_index, :], input_columns, label_column)
    x_val, y_val = preprocessing.elaborate_input(meme_df.iloc[val_index, :], input_columns, label_column)

    model, history = model_performances.get_trained_model(x_train, 
                            y_train, 
                            x_val, 
                            y_val,
                            input_shape=embed_size, 
                            activation_function='LeakyReLU', 
                            neurons=embed_size/2, 
                            dropout=0.2, 
                            epochs=100)

    iteration = iteration + 1

    # make prediction on training data
    pred = model.predict(x_test, batch_size=batch_size)

    predict_values = np.append(predict_values, pred)
    real_values = np.append(real_values, y_test)
    ids = np.append(ids, test_df['file_name'].tolist())

    result_df = test_df[['file_name', 'misogynous']].copy()
    result_df['score_col'] = pred
    
    # write on file
    file = open(file_out, "a+")
    file.write('\n\nITERAZIONE ' + str(iteration) + '\n')
    file.write(json.dumps(model_performances.compute_confusion_rates(result_df, 'score_col', 'misogynous', threshold)))
    file.write('\n') 
    file.write(classification_report(result_df['misogynous'].values, (result_df['score_col']>threshold).astype(int).values, target_names=['not_mis','mis']))
    file.close()


In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'dev'], loc='upper left')
plt.show()

In [16]:
# results dataframe, save predictions
result_df = pd.DataFrame({'id': ids, 'real': real_values.astype(int), 'pred': predict_values})
result_df.to_csv(predictions_csv_path, index=False, sep='\t')

# Overall metrics _ write on file
file = open(file_out, "a+")
file.write('\n\n10 Fold Results ' + str(iteration) + '\n')
file.write(json.dumps(model_performances.compute_confusion_rates(result_df, 'pred', 'real', threshold)))
file.write('\n') 
file.write(classification_report(result_df['real'].values, (result_df['pred']>threshold).astype(int).values, target_names=['not_mis','mis']))
file.write('\n AUC:') 
file.write(str(model_performances.compute_auc(result_df['real'].values, result_df['pred'].values)))
file.close()
model_performances.compute_confusion_rates(result_df, 'pred', 'real', threshold)

{'tpr': 0.8988,
 'tnr': 0.2118,
 'fpr': 0.7882,
 'fnr': 0.10119999999999996,
 'precision': 0.5327800829875519,
 'recall': 0.8988,
 'accuracy': 0.5553,
 'f1': 0.6689988835132118,
 'auc': 0.6087016}