In [1]:
import pandas as pd
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import precision_recall_fscore_support, classification_report, auc, roc_curve
from scipy.sparse import hstack
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from datasets import load_dataset, Image, concatenate_datasets
from torch.utils.data import DataLoader, Dataset, TensorDataset

In [2]:
# importing the data
datafolder = '../../data/'
# train = datafolder+'train_with_features.csv'
test = datafolder+'test_with_features.csv'
dev = datafolder+'dev_with_features.csv'
test_unseen = datafolder+'test_unseen_with_features.csv'
dev_unseen = datafolder+'dev_unseen_with_features.csv'
# df_train = pd.read_csv(train, skip_blank_lines=False)
df_dev = pd.read_csv(dev, skip_blank_lines=False)
df_dev_unseen = pd.read_csv(dev_unseen, skip_blank_lines=False)
df_test = pd.read_csv(test, skip_blank_lines=False)
df_test_unseen = pd.read_csv(test_unseen, skip_blank_lines=False)
df_train = pd.concat([df_dev, df_test], ignore_index=True, axis=0)
df_test = df_test_unseen
df_dev = df_dev_unseen
df_train = df_train.fillna('')

In [3]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flattening = nn.Flatten()
        self.fc1 = nn.Linear(4, 8)
        self.fc2 = nn.Linear(8, 4)
        self.dropout = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(4, 2)

    def forward(self, x):
        x = self.flattening(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.softmax(self.fc3(x), dim=1)
        return x
    
def fuse_proba(arr1, arr2):
    assert arr1.size == arr2.size
    return np.hstack([arr1, arr2])

def train(X, Y, batch_size = 64, num_epochs = 1000):
    # input_dim =  X.shape[1]
    model = MLP()
    criterion = nn.BCELoss()  # binary cross-entropy loss
    optimizer = torch.optim.Adam(model.parameters())
    
    train_dataset = TensorDataset(torch.Tensor(X), torch.Tensor(Y))
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop:
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader: 
            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.view(-1, 1)  # reshape labels to match output shape
            loss = criterion(torch.unsqueeze(outputs[:, 1], dim=1), labels)  # calculate the loss using binary cross-entropy with the positive class probability and the binary label
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    return model

def evaluate(model, X, Y):
    test_dataset = TensorDataset(torch.Tensor(X), torch.Tensor(Y))
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in test_loader:
            test_outputs = model(inputs)  # Get the model's predictions
            _, predicted = torch.max(test_outputs.data, 1)  # Get the predicted class by choosing the class with highest probability
            predicted_probabilities = test_outputs[:, 1]  # Get the probability for the positive class

    # Apply a threshold to the predicted probabilities to obtain binary predictions
    threshold = 0.5
    binary_predictions = (predicted_probabilities > threshold).float()

    return binary_predictions

def performance(preds, labels):
    results = []
    report = classification_report(labels, preds, output_dict=True)
    fpr, tpr, thresholds = roc_curve(labels, preds)
    AUROC = auc(fpr, tpr)
    results = {'f1_score': report['macro avg']['f1-score'], 
               'precision': report['macro avg']['precision'], 
               'recall': report['macro avg']['recall'], 
               'accuracy': report['accuracy'], 
               'AUROC': AUROC
              }
    df_results = pd.DataFrame(results, index=[0]) 
    df_results = df_results.sort_values(by='AUROC', ascending=False)
    return df_results

def late_fuse_MLP(X_train, Y_train, X_test, Y_test):
    # print('training ...')
    model = train(X_train, Y_train)
    # print('predicting')
    pred = evaluate(model, X_test, Y_test)
    results_df = performance(pred, Y_test)
    return results_df, pred

In [None]:
import torch.cuda

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flattening = nn.Flatten()
        self.fc1 = nn.Linear(4, 8)
        self.fc2 = nn.Linear(8, 4)
        self.dropout = nn.Dropout(p=0.5)
        self.fc3 = nn.Linear(4, 2)

    def forward(self, x):
        x = self.flattening(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.softmax(self.fc3(x), dim=1)
        return x
    
def fuse_proba(arr1, arr2):
    assert arr1.size == arr2.size
    return np.hstack([arr1, arr2])

def train(X, Y, batch_size = 64, num_epochs = 1000):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # input_dim =  X.shape[1]
    model = MLP()
    model.to(device)
    criterion = nn.BCELoss()  # binary cross-entropy loss
    optimizer = torch.optim.Adam(model.parameters())
    
    X_tensor = torch.Tensor(X).to(device)
    Y_tensor = torch.Tensor(Y).to(device)
    train_dataset = TensorDataset(X_tensor, Y_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
    
    # Training loop:
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.view(-1, 1)  # reshape labels to match output shape
            loss = criterion(torch.unsqueeze(outputs[:, 1], dim=1), labels)  # calculate the loss using binary cross-entropy with the positive class probability and the binary label
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    return model

def evaluate(model, X, Y):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    test_X_tensor = torch.Tensor(X).to(device)
    test_Y_tensor = torch.Tensor(Y).to(device)
    test_dataset = TensorDataset(test_X_tensor, test_Y_tensor)
    test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            test_outputs = model(inputs)  # Get the model's predictions
            _, predicted = torch.max(test_outputs.data, 1)  # Get the predicted class by choosing the class with highest probability
            predicted_probabilities = test_outputs[:, 1]  # Get the probability for the positive class

    # Apply a threshold to the predicted probabilities to obtain binary predictions
    threshold = 0.5
    binary_predictions = (predicted_probabilities > threshold).float()

    return binary_predictions

def performance(preds, labels):
    results = []
    report = classification_report(labels, preds, output_dict=True)
    fpr, tpr, thresholds = roc_curve(labels, preds)
    AUROC = auc(fpr, tpr)
    results = {'f1_score': report['macro avg']['f1-score'], 
               'precision': report['macro avg']['precision'], 
               'recall': report['macro avg']['recall'], 
               'accuracy': report['accuracy'], 
               'AUROC': AUROC
              }
    df_results = pd.DataFrame(results, index=[0]) 
    df_results = df_results.sort_values(by='AUROC', ascending=False)
    return df_results

def late_fuse_MLP(X_train, Y_train, X_test, Y_test):
    # print('training ...')
    model = train(X_train, Y_train)
    # print('predicting')
    pred = evaluate(model, X_test, Y_test)
    results_df = performance(pred, Y_test)
    return results_df

# ResNet50:

In [4]:
# train_img = torch.stack(torch.load('image_embeddings/train_img_tensors.pt'))
train_img1 = torch.stack(torch.load('image_embeddings/dev_img_tensors.pt'))
train_img2 = torch.stack(torch.load('image_embeddings/test_img_tensors.pt'))

dev_img = torch.stack(torch.load('image_embeddings/dev_unseen_img_tensors.pt'))
test_img = torch.stack(torch.load('image_embeddings/test_unseen_img_tensors.pt'))

train_img = torch.cat((train_img1,train_img2), dim=0)

Y_train = df_train['label']
Y_dev = df_dev['label']
Y_test = df_test['label']

train_X = [np.array(x.cpu()).flatten() for x in train_img]
dev_X = [np.array(x.cpu()).flatten() for x in dev_img]
test_X = [np.array(x.cpu()).flatten() for x in test_img]

In [None]:
from sklearn.svm import SVC
clf_svc = SVC(kernel='linear', C=10, probability = True) 
clf_svc.fit(train_X, Y_train)

### Note: This model took approx 18 hours to run. Hence it is better if saved to disk: size on disk 4.3 GB

In [5]:
import pickle

# # Save model to disk
# with open('models/ResNet50_lin_c10_model.pkl', 'wb') as f:
#     pickle.dump(clf_svc, f)


# This will save the `clf_svc` object to a file named `ResNet50_lin_c10_model.pkl` in binary mode (`'wb'`). To load the model from disk, you can use the `pickle.load()` function as follows:


# Load saved model
with open('models/ResNet50_lin_c10_model.pkl', 'rb') as f:
    clf_svc = pickle.load(f)



In [6]:
y_train_proba_ResNet = clf_svc.predict_proba(train_X)
y_dev_proba_ResNet = clf_svc.predict_proba(dev_X)
y_test_proba_ResNet = clf_svc.predict_proba(test_X)


In [None]:
# test_unseen = datafolder+'test_unseen_with_features.csv'
# dev_unseen = datafolder+'dev_unseen_with_features.csv'

# df_dev_unseen = pd.read_csv(dev_unseen, skip_blank_lines=False)
# df_test_unseen = pd.read_csv(test_unseen, skip_blank_lines=False)

# dev_img = torch.stack(torch.load('image_embeddings/dev_img_tensors.pt'))
# test_img = torch.stack(torch.load('image_embeddings/test_img_tensors.pt'))

# Y_dev_unseen = df_dev['label']
# Y_test_unseen = df_test['label']

# dev_X = [np.array(x.cpu()).flatten() for x in dev_img]
# test_X = [np.array(x.cpu()).flatten() for x in test_img]

# y_dev_unseen_proba_ResNet = clf_svc.predict_proba(dev_X)
# y_test_unseen_proba_ResNet = clf_svc.predict_proba(test_X)

# BoW:

In [30]:
vectorizer_BoW = CountVectorizer(tokenizer=lambda x: x.split(), analyzer='word', ngram_range=(1, 1)) # to build 1n-grams from the word ==> BoW
                
X_train_BoW = vectorizer_BoW.fit_transform(df_train.tokens)
X_dev_BoW = vectorizer_BoW.transform(df_dev.tokens) 
X_test_BoW = vectorizer_BoW.transform(df_test.tokens)

clf = LinearSVC(max_iter=1000000, C = 10,random_state =456) 
clf_svc_BoW = CalibratedClassifierCV(clf) 

clf_svc_BoW.fit(X_train_BoW, Y_train)

y_dev_proba_BoW = clf_svc_BoW.predict_proba(X_dev_BoW)
y_test_proba_BoW = clf_svc_BoW.predict_proba(X_test_BoW)


In [31]:
X_dev_Bow = fuse_proba(y_dev_proba_BoW, y_dev_proba_ResNet)
X_test_Bow = fuse_proba(y_test_proba_BoW, y_test_proba_ResNet)

BoW_ResNet_results = late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

In [32]:
BoW_ResNet_results

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.518752,0.536222,0.531941,0.534,0.531941


In [57]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.557786,0.557867,0.557816,0.558,0.557816


In [58]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.575,0.613787,0.594486,0.592,0.594486


In [59]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.550965,0.551862,0.551503,0.552,0.551503


In [60]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.335989,0.253,0.5,0.506,0.5


In [61]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.554796,0.557469,0.556704,0.556,0.556704


In [62]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.575684,0.589395,0.583548,0.582,0.583548


In [63]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.335989,0.253,0.5,0.506,0.5


In [64]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.569316,0.587437,0.579787,0.578,0.579787


In [65]:
late_fuse_MLP( X_test_Bow, Y_test, X_dev_Bow, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.490662,0.522013,0.517194,0.52,0.517194


# Char-n-gram:

In [33]:
vectorizer_char = CountVectorizer(tokenizer=lambda x: list(x), analyzer='char', ngram_range=(1, 3)) # to build 1n-grams from the word ==> BoW

X_train_char = vectorizer_char.fit_transform(df_train.tokens)
X_dev_char = vectorizer_char.transform(df_dev.tokens) 
X_test_char = vectorizer_char.transform(df_test.tokens)

clf = LinearSVC(max_iter=1000000, C = 10, random_state =456) 
clf_svc_char = CalibratedClassifierCV(clf) 
clf_svc_char.fit(X_train_char, Y_train)


y_dev_proba_char = clf_svc_char.predict_proba(X_dev_char)
y_test_proba_char = clf_svc_char.predict_proba(X_test_char)



In [34]:
X_dev_char = fuse_proba(y_dev_proba_char, y_dev_proba_ResNet)
X_test_char = fuse_proba(y_test_proba_char, y_test_proba_ResNet)

char_ResNet_results = late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

In [35]:
char_ResNet_results

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.549479,0.550805,0.550487,0.55,0.550487


In [54]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.515706,0.521717,0.520707,0.522,0.520707


In [55]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.548872,0.551316,0.550679,0.55,0.550679


In [56]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.471025,0.521999,0.514498,0.518,0.514498


In [66]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.549046,0.551178,0.550631,0.55,0.550631


In [67]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.527084,0.527704,0.527548,0.528,0.527548


In [68]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.529908,0.530288,0.530244,0.53,0.530244


In [69]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.335989,0.253,0.5,0.506,0.5


In [70]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.462243,0.523657,0.51421,0.518,0.51421


In [71]:
late_fuse_MLP( X_test_char, Y_test, X_dev_char, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.523232,0.527857,0.526876,0.528,0.526876


# Advanced SVM:

In [39]:
df_train = df_train.fillna('')

df_dev = df_dev.fillna('')
df_test = df_test.fillna('')

In [40]:
vectorizer1 = CountVectorizer(tokenizer=lambda x: x.split(), analyzer='word', ngram_range=(1, 3)) # to build n-grams (n=1-3) from the pos_fw_emo representation
vectorizer2 = CountVectorizer(tokenizer=lambda x: x.split(), analyzer='word', ngram_range=(1, 1)) # unigrams of emotion associations
vectorizer3 = CountVectorizer(tokenizer=lambda x: x.split(), analyzer='word', ngram_range=(1, 1)) # unigrams of tokens (BoW)
vectorizer4 = CountVectorizer(tokenizer=lambda x: x.split(), analyzer='word', ngram_range=(1, 4)) # unigrams of intent (BoW)


# combine the features
X_train_advanced = hstack((vectorizer1.fit_transform(df_train.pos_fw_emo), vectorizer2.fit_transform(df_train.emotion_associations), df_train[['count']].values, \
                df_train[['sentiment_score']].values, vectorizer3.fit_transform(df_train.tokens), vectorizer4.fit_transform(df_train.intent)), format='csr') 

X_dev_advanced = hstack((vectorizer1.transform(df_dev.pos_fw_emo), vectorizer2.transform(df_dev.emotion_associations), df_dev[['count']].values, \
                df_dev[['sentiment_score']].values, vectorizer3.transform(df_dev.tokens), vectorizer4.transform(df_dev.intent) ), format='csr') 

X_test_advanced = hstack((vectorizer1.transform(df_test.pos_fw_emo), vectorizer2.transform(df_test.emotion_associations), df_test[['count']].values, \
                df_test[['sentiment_score']].values, vectorizer3.transform(df_test.tokens), vectorizer4.transform(df_test.intent) ), format='csr') 

clf = LinearSVC(max_iter=1000000, C = 10, random_state =456) 
clf_svc_advanced = CalibratedClassifierCV(clf) 
clf_svc_advanced.fit(X_train_advanced, Y_train)

y_dev_proba_advanced = clf_svc_advanced.predict_proba(X_dev_advanced)
y_test_proba_advanced = clf_svc_advanced.predict_proba(X_test_advanced)

In [41]:
X_dev_advanced = fuse_proba(y_dev_proba_advanced, y_dev_proba_ResNet)
X_test_advanced = fuse_proba(y_test_proba_advanced, y_test_proba_ResNet)

advanced_ResNet_results = late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

In [42]:
advanced_ResNet_results

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.51963,0.528284,0.526492,0.528,0.526492


In [45]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.489489,0.52824,0.520955,0.524,0.520955


In [46]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.467519,0.582007,0.537589,0.542,0.537589


In [47]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.457691,0.571538,0.531517,0.536,0.531517


In [48]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.467514,0.552422,0.5279,0.532,0.5279


In [72]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.335989,0.253,0.5,0.506,0.5


In [73]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.335989,0.253,0.5,0.506,0.5


In [74]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.524251,0.540376,0.536037,0.538,0.536037


In [75]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.468918,0.587445,0.539566,0.544,0.539566


In [76]:
late_fuse_MLP( X_test_advanced, Y_test, X_dev_advanced, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.469743,0.573262,0.535709,0.54,0.535709


# Fine-tuned_Bert:

In [8]:
train1 = datafolder+'train_with_features.csv'
df_train1 = pd.read_csv(train1, skip_blank_lines=False)

In [9]:
from transformers import BertTokenizer, BertForSequenceClassification
from utils import *
# Set up GPU or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model = BertForSequenceClassification.from_pretrained('bert-base-cased', num_labels=2)
model.to(device)

tokenizer, model = fine_tune(df_train1, tokenizer, model)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

Finished epoch 1 with average training loss of 0.5869622450125845.
Finished epoch 2 with average training loss of 0.46985703684333574.
Finished epoch 3 with average training loss of 0.38471138017966333.
Finished epoch 4 with average training loss of 0.31831575263487666.
Finished epoch 5 with average training loss of 0.2651188317266174.
Finished epoch 6 with average training loss of 0.23545115464892155.
Finished epoch 7 with average training loss of 0.21394476840706697.
Finished epoch 8 with average training loss of 0.1993517862506827.
Finished epoch 9 with average training loss of 0.181512619039618.
Finished epoch 10 with average training loss of 0.1718124886904668.


In [10]:
y_train_proba_bert = predict_proba_from_fine_tuned(df_train, tokenizer, model)
y_dev_proba_bert = predict_proba_from_fine_tuned(df_dev, tokenizer, model)
y_test_proba_bert = predict_proba_from_fine_tuned(df_test, tokenizer, model)


In [11]:
X_train_bert = fuse_proba(y_train_proba_bert, y_train_proba_ResNet)
X_dev_bert = fuse_proba(y_dev_proba_bert, y_dev_proba_ResNet)
X_test_bert = fuse_proba(y_test_proba_bert, y_test_proba_ResNet)

# bert_ResNet_results = late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

In [12]:
def late_fuse_MLP(X_train, Y_train, X_test, Y_test):
    # print('training ...')
    model = train(X_train, Y_train)
    # print('predicting')
    pred = evaluate(model, X_test, Y_test)
    results_df = performance(pred, Y_test)
    return results_df, pred

In [13]:
test, test_pred = late_fuse_MLP( X_train_bert, Y_train, X_test_bert, Y_test)

In [16]:
test

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.569284,0.582883,0.570267,0.6225,0.570267


In [14]:
dev, dev_pred = late_fuse_MLP( X_train_bert, Y_train, X_dev_bert, Y_dev,)

In [17]:
dev

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.507162,0.545371,0.5275,0.611111,0.5275


In [15]:
df_dev['bert+resnet'] = dev_pred
df_test['bert+resnet'] = test_pred

df_dev.to_csv(datafolder+'dev_unseen_with_features.csv', index=False)
df_test.to_csv(datafolder+'test_unseen_with_features.csv', index=False)

In [84]:
bert_ResNet_results

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.546028,0.580106,0.565441,0.568,0.565441


In [85]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.509654,0.58751,0.554312,0.558,0.554312


In [86]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.503984,0.593281,0.55412,0.558,0.55412


In [87]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.525952,0.578376,0.556864,0.56,0.556864


In [88]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.500998,0.590604,0.552096,0.556,0.552096


In [89]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.518304,0.595053,0.560385,0.564,0.560385


In [90]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.506956,0.595916,0.556144,0.56,0.556144


In [91]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.502501,0.594905,0.554072,0.558,0.554072


In [92]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.545179,0.580731,0.565393,0.568,0.565393


In [93]:
late_fuse_MLP( X_test_bert, Y_test, X_dev_bert, Y_dev)

Unnamed: 0,f1_score,precision,recall,accuracy,AUROC
0,0.533787,0.576819,0.559129,0.562,0.559129
