In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
data_set = '/kaggle/input/flickr8k/Flickr8K/'
captions_dir = data_set + "Flickr8k_text/"
train_images = captions_dir + 'Flickr_8k.trainImages.txt'
test_images = captions_dir + 'Flickr_8k.testImages.txt'

In [None]:
def load_all_captions(file_name):
    text_file = open(file_name, "r")
    lines = text_file.readlines()
    data_set = []
    for l in range(len(lines)):
        line = lines[l].strip()
        image_name = line[:line.find("#")]
        caption_number = line[line.find("#")+1:line.find("#")+2]
        caption = line[line.find("\t")+1:]
        data_set.append([image_name, caption_number, caption])
    return pd.DataFrame(data_set, columns =['image', 'caption#', 'caption'])

In [None]:
captions_df = load_all_captions(captions_dir + "Flickr8k.token.txt")
captions_df.head()

In [None]:
def load_test_image(file_name):
    text_file = open(file_name, "r")
    lines = text_file.readlines()
    data_set = []
    for l in range(len(lines)):
        line = lines[l].strip()
        data_set.append(line)
    return pd.DataFrame(data_set, columns =['image'])

In [None]:
test_df = load_test_image('/kaggle/input/flickr8k/Flickr8K/Flickr8k_text/Flickr_8k.testImages.txt')
test_df.head()

In [None]:
def get_ground_captions(test_df, captions_df):
    new_df = pd.DataFrame(columns = ['image', 'caption'])
    for i in tqdm(test_df.index):
        temp = captions_df[captions_df['image']==test_df.iloc[i]['image']]
        for j in range(5):
            new_df = new_df.append({'image': test_df.iloc[i]['image'], 'caption': temp.iloc[j]['caption']}, ignore_index = True)
    return new_df

In [None]:
test_captions_df = get_ground_captions(test_df, captions_df)
test_captions_df.head()

# Experiments for Language Bias

In [None]:
def idetify_gender(sentence):
    # indentify gender for a given sentence
    male_words = ["male", "man", "boy", "gentleman", "guy"]
    female_words = ["female", "woman", "girl", "lady"]
    male = False
    female = False
    for male_word in male_words:
        if sentence.find(male_word) > 0:
            male = True
    for female_word in female_words:
        if sentence.find(female_word) > 0:
            female = True
    if male and not female:
        return "male"
    elif female and not male:
        return "female"
    return "other"

In [None]:
def gender_error(ground_truth, predicted):
    # imput format: df
    ground_truth_gender = ground_truth['caption'].apply(lambda x: idetify_gender(x))
    predicted_gender = predicted['caption'].apply(lambda x: idetify_gender(x))
    return confusion_matrix(ground_truth_gender, predicted_gender, labels=["male", "female", "other"])

In [None]:
def gender_ratio(ground_truth, predicted):
    # imput format: df
    # Output: for both ground truth and predicted captions: ratio of captions which include only female words 
    # to captions which include only male words
    # Ideally, both ratios should match
    ground_truth_gender = ground_truth['caption'].apply(lambda x: idetify_gender(x))
    predicted_gender = predicted['caption'].apply(lambda x: idetify_gender(x))
    ground_truth_ratio = ground_truth_gender.value_counts()["female"]/ground_truth_gender.value_counts()["male"]
    predicted_ratio = predicted_gender.value_counts()["female"]/predicted_gender.value_counts()["male"]
    return (ground_truth_ratio, predicted_ratio)

In [None]:
def object_gender_cooccurrence(ground_truth, predicted, object_name):
    gt_male_co = ground_truth['caption'].apply(lambda x: True if (idetify_gender(x)=="male" and x.find(object_name)>0) else False)
    gt_female_co =  ground_truth['caption'].apply(lambda x: True if (idetify_gender(x)=="female" and x.find(object_name)>0) else False)
    m1 = gt_male_co.value_counts()[True] if True in gt_male_co.value_counts() else 0
    f1 = gt_female_co.value_counts()[True] if True in gt_female_co.value_counts() else 0
    gt = (m1, f1)

    pred_male_co = predicted['caption'].apply(lambda x: True if (idetify_gender(x)=="male" and x.find(object_name)>0) else False)
    pred_female_co =  predicted['caption'].apply(lambda x: True if (idetify_gender(x)=="female" and x.find(object_name)>0) else False)
    m2 = pred_male_co.value_counts()[True] if True in pred_male_co.value_counts() else 0
    f2 = pred_female_co.value_counts()[True] if True in pred_female_co.value_counts() else 0
    pred = (m2, f2)

    return (gt, pred)

In [None]:
def experiment(df, pred_df):

    # Gender error calculation
    cf_matrix = gender_error(df, pred_df)
    plt.figure(figsize = (7,5))
    ax = sns.heatmap(cf_matrix, annot=True, xticklabels = ["male", "female", "other"], yticklabels = ["male", "female", "other"])
    ax.set(title="Gender ratio",
      xlabel="Predicted",
      ylabel="Ground truth")
    plt.show()

    # Gender ratio calculation
    gt, pred = gender_ratio(df, pred_df)
    print("Gender ratio of female to male:")
    print("Ground truth captions-> " + str(gt))
    print("Predicted captions->    " + str(pred))

    # Object - gender cooccurence
    words = ["paint", "bike"]
    for word in words:
        gt, pred = object_gender_cooccurrence(df, pred_df, word)
        print(gt,pred)
        X = ["male", "female"]
        X_axis = np.arange(2)
        
        plt.bar(X_axis - 0.2, gt, 0.4, label = 'Ground truth')
        plt.bar(X_axis + 0.2, pred, 0.4, label = 'Predicted')

        plt.xticks(X_axis, X)
        plt.xlabel("Gender")
        plt.ylabel("Freq of co-occurrence with \"" + word + "\"")
        plt.title("Freq of co-occurrence with \"" + word + "\" along with male and female words")
        plt.legend()
        plt.show()

In [None]:
pred_df = pd.read_csv('/kaggle/working/output_captions.txt')

In [None]:
experiment(test_captions_df, pred_df)