In [48]:
import pandas as pd
import numpy as np
import os

# SpeechCLIP
base_path = "/data/user_data/sbharad2/SpeechCLIP/data"
embedding_read_path_pattern = (
    "{base_path}/Flickr8k.{csv_name}.token.txt.audio_embeddings/"
)

# FLICKR
csv_path_pattern = "{base_path}/flickr/{csv_name}.csv"

train_df = pd.read_csv(
    csv_path_pattern.format(base_path=base_path, csv_name="flickr_train_sampled")
)
dev_df = pd.read_csv(
    csv_path_pattern.format(base_path=base_path, csv_name="flickr_dev_sampled")
)

# change target type as list
train_df["target"] = train_df["target"].apply(eval)
dev_df["target"] = dev_df["target"].apply(eval)

print("Train size:", train_df.shape, "Test size:", dev_df.shape)

Train size: (2000, 6) Test size: (1000, 6)


In [53]:
from sklearn.linear_model import LinearRegression


target_vocabulary = {}


def _load_data(df, embedding_read_path, target_vocabulary=None):
    all_embeddings = []
    all_targets = []
    for i, r in df.iterrows():
        try:
            example_id = r["example_id"]
            embedding = np.load(os.path.join(embedding_read_path, example_id) + ".npy")
            all_embeddings.append(embedding)
            for tgt in r["target"]:
                if tgt not in target_vocabulary:
                    target_vocabulary[tgt] = len(target_vocabulary)
        except FileNotFoundError:
            print(f"File not found for {example_id}.npy")

    for i, r in df.iterrows():
        example_id = r["example_id"]
        if not os.path.exists(os.path.join(embedding_read_path, example_id) + ".npy"):
            continue
        target = np.zeros(len(target_vocabulary))
        for tgt in r["target"]:
            target[target_vocabulary[tgt]] = 1
        all_targets.append(target)

    all_embeddings = np.array(all_embeddings)
    all_targets = np.array(all_targets)
    print(f"Loaded {all_embeddings.shape} embeddings and {all_targets.shape} targets.")
    return all_embeddings, all_targets, target_vocabulary


X_train, y_train, target_vocabulary = _load_data(
    train_df,
    embedding_read_path_pattern.format(
        base_path=base_path, csv_name="flickr_train_sampled"
    ),
    target_vocabulary,
)
print("Training shapes")
print(X_train.shape, y_train.shape)
print(len(target_vocabulary), "Target vocabulary size.")

(
    X_dev,
    y_dev,
    target_vocabulary,
) = _load_data(
    dev_df,
    embedding_read_path_pattern.format(
        base_path=base_path, csv_name="flickr_dev_sampled"
    ),
    target_vocabulary,
)
print("Dev shapes")
print(X_dev.shape, y_dev.shape)
print(len(target_vocabulary), "Target vocabulary size.")

# Remove columns from y_dev if they are not present in y_train
print("Original dev and train target shapes", y_dev.shape, y_train.shape)
if y_dev.shape[1] > y_train.shape[1]:
    print("Removing columns from dev target")
    y_dev = y_dev[:, : y_train.shape[1]]
print("New dev and train target shapes", y_dev.shape, y_train.shape)

File not found for 1155138244_859fd6e079.jpg#0.npy
File not found for 1468103286_96a6e07029.jpg#0.npy
File not found for 1479857177_9d4a6f38fd.jpg#0.npy
File not found for 1643915227_9f48068772.jpg#0.npy
File not found for 1797554350_20998753c0.jpg#1.npy
File not found for 1808504612_3508f3c9bb.jpg#0.npy
File not found for 199463720_329a802206.jpg#0.npy
File not found for 2058091220_2087270068.jpg#0.npy
File not found for 2087317114_cf06df5aa5.jpg#1.npy
File not found for 2136455112_202c093ba4.jpg#0.npy
File not found for 2221818690_9003756d33.jpg#1.npy
File not found for 2258277193_586949ec62.jpg.1#2.npy
File not found for 2319197581_94f807b204.jpg#0.npy
File not found for 236095031_5cb17dc54a.jpg#0.npy
File not found for 2394824046_51cec8e5e7.jpg#1.npy
File not found for 240696675_7d05193aa0.jpg#0.npy
File not found for 2410153942_ba4a136358.jpg#0.npy
File not found for 2428275562_4bde2bc5ea.jpg#1.npy
File not found for 2553619107_d382a820f9.jpg#1.npy
File not found for 2557972410_69

In [59]:
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

clf = MultiOutputClassifier(LogisticRegression()).fit(X_train, y_train)
y_pred = clf.predict(X_dev)
# Compute multi label metrics
print("Exact Match:", accuracy_score(y_dev, y_pred) * 100)

print("Micro averaged metrics")
print("F1 Score:", f1_score(y_dev, y_pred, average="micro") * 100)
print("Precision:", precision_score(y_dev, y_pred, average="micro") * 100)
print("Recall:", recall_score(y_dev, y_pred, average="micro") * 100)

Exact Match: 15.6
Micro averaged metrics
F1 Score: 44.66257668711657
Precision: 88.3495145631068
Recall: 29.88505747126437
