In [26]:
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.metrics import precision_recall_fscore_support
from utils import *

In [27]:
datafolder = '../../data/hateful_memes/'
train = datafolder+'train_with_features.csv'
test = datafolder+'test_with_features.csv'
dev = datafolder+'dev_with_features.csv'
df_train = pd.read_csv(train, skip_blank_lines=False)
df_dev = pd.read_csv(dev, skip_blank_lines=False)
df_test = pd.read_csv(test, skip_blank_lines=False)

In [28]:
df_dev.columns

Index(['id', 'img', 'label', 'text', 'tokens', 'lemmas', 'upos', 'pos_fw_emo',
       'count', 'emotion_associations', 'sentiment_score', 'intent',
       'ResNet_svm_rbf_kernel', 'ResNet_svm_rbf_kernelC10',
       'ResNet_svm_linear_kernel', 'baseline_svm_rbf_kernelC10',
       'Advanced_svm_100K_C10', 'Advanced_svm_linear_100K_C10',
       'ResNet_svm_linear_kernelC10', 'hatebert_direct', 'hatebert_vectors',
       'bert_base_cased_finetuned', 'ResNet_nn'],
      dtype='object')

In [29]:
models = ['label','ResNet_svm_rbf_kernel', 'ResNet_svm_rbf_kernelC10',
       'ResNet_svm_linear_kernel', 'baseline_svm_rbf_kernelC10',
       'Advanced_svm_linear_100K_C10',
       'ResNet_svm_linear_kernelC10', 'hatebert_direct', 'hatebert_vectors',
       'bert_base_cased_finetuned', 'ResNet_nn']
models = np.sort(models)

In [6]:
utils.get_f1(df_dev[models])

Unnamed: 0,models,F1-score
6,baseline_svm_rbf_kernelC10,0.520697
7,bert_base_cased_finetuned,0.513363
0,Advanced_svm_linear_100K_C10,0.50764
9,hatebert_vectors,0.505507
3,ResNet_svm_linear_kernelC10,0.492002
2,ResNet_svm_linear_kernel,0.484847
1,ResNet_nn,0.475463
8,hatebert_direct,0.454449
5,ResNet_svm_rbf_kernelC10,0.437037
4,ResNet_svm_rbf_kernel,0.366971


In [7]:
utils.get_f1(df_test[models])

Unnamed: 0,models,F1-score
7,bert_base_cased_finetuned,0.547202
0,Advanced_svm_linear_100K_C10,0.512552
9,hatebert_vectors,0.504715
6,baseline_svm_rbf_kernelC10,0.502887
1,ResNet_nn,0.501992
3,ResNet_svm_linear_kernelC10,0.481481
2,ResNet_svm_linear_kernel,0.4596
5,ResNet_svm_rbf_kernelC10,0.458421
8,hatebert_direct,0.426173
4,ResNet_svm_rbf_kernel,0.38919


In [8]:
# models = ['bert_base_cased_finetuned', 'Advanced_svm_linear_100K_C10', 'hatebert_vectors',
#           'ResNet_nn', 'ResNet_svm_linear_kernelC10', 'ResNet_svm_linear_kernel']
# models = ['ResNet_svm_rbf_kernel', 'ResNet_svm_rbf_kernelC10',
#        'ResNet_svm_linear_kernel', 'baseline_svm_rbf_kernelC10',
#        'Advanced_svm_linear_100K_C10',
#        'ResNet_svm_linear_kernelC10', 'hatebert_direct', 'hatebert_vectors',
#        'bert_base_cased_finetuned', 'ResNet_nn']
models = ['ResNet_svm_rbf_kernelC10', 'ResNet_svm_linear_kernel', 'ResNet_svm_linear_kernelC10', 'ResNet_nn',
 'hatebert_direct', 'hatebert_vectors', 'bert_base_cased_finetuned', 'Advanced_svm_linear_100K_C10']
# 4ResNets models 3 berts and 1 6features svm

In [25]:
print('\n'.join(models))

ResNet_svm_rbf_kernelC10
ResNet_svm_linear_kernel
ResNet_svm_linear_kernelC10
ResNet_nn
hatebert_direct
hatebert_vectors
bert_base_cased_finetuned
Advanced_svm_linear_100K_C10


# combining predictions of multiple models in one

In [30]:
models = [ 'Advanced_svm_100K_C10', 'ResNet_nn' ]

In [31]:
combi = df_test[models].apply(lambda row: any(row), axis=1)
print(f1_score(df_test['label'], combi, average='macro'))

0.5084772503853296


In [32]:
combi = df_dev[models].apply(lambda row: any(row), axis=1)
print(f1_score(df_dev['label'], combi, average='macro'))

0.48297918429237635


In [16]:
clf_svc = LinearSVC()
Y_train = df_dev['label']
clf_svc.fit(df_dev[models], Y_train)
Y_pred = clf_svc.predict(df_test[models])

In [17]:
results = pd.DataFrame(
    [list(precision_recall_fscore_support(df_test['label'], Y_pred, average='macro')[:3])],
    columns=['precision', 'recall', 'F1'])
results

Unnamed: 0,precision,recall,F1
0,0.602959,0.594098,0.586815


In [60]:
best_score, best_columns = utils.find_best_column_combination(df_test, models)

print(f"Best combination: {best_columns}, F1-score: {best_score}")


Best combination: ['ResNet_svm_rbf_kernelC10', 'ResNet_svm_linear_kernel', 'ResNet_svm_linear_kernelC10', 'ResNet_nn', 'hatebert_direct', 'hatebert_vectors', 'bert_base_cased_finetuned', 'Advanced_svm_linear_100K_C10'], F1-score: 0.6207430340557276


In [70]:
# best_combi = ['Advanced_svm_100K_C10', 'ResNet_nn', 'ResNet_svm_linear_kernel', 'ResNet_svm_linear_kernelC10', 'ResNet_svm_rbf_kernelC10', 'bert_base_cased_finetuned', 'hatebert_direct', 'hatebert_vectors']
# best_combi = ['Advanced_svm_100K_C10', 'ResNet_nn', 'ResNet_svm_linear_kernel', 'ResNet_svm_linear_kernelC10', 'ResNet_svm_rbf_kernelC10', 'baseline_svm_rbf_kernelC10', 'bert_base_cased_finetuned', 'hatebert_direct', 'hatebert_vectors']
combi = df_test[best_combi].apply(lambda row: any(row), axis=1)
print(f1_score(df_test['label'], combi, average='macro'))

0.4643263192877508
