In [1]:
from embeddings_loader import *
from sklearn.cluster import KMeans
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import roc_auc_score


In [2]:
train_labels, dev_labels, test_labels = load_labels()

In [3]:
def computeAllScores(y_pred_train, y_pred_dev, y_pred_test):
    print("Accuracy Train: ", accuracy_score(train_labels, y_pred_train))
    print("Accuracy Dev: ", accuracy_score(dev_labels, y_pred_dev))
    print("Accuracy Test: ", accuracy_score(test_labels, y_pred_test))
    print("Weighted F1 Train: ", f1_score(train_labels, y_pred_train, average='weighted'))
    print("Weighted F1 Dev: ", f1_score(dev_labels, y_pred_dev, average='weighted'))
    print("Weighted F1 Test: ", f1_score(test_labels, y_pred_test, average='weighted'))
    print("Macro F1 Train: ", f1_score(train_labels, y_pred_train, average='macro'))
    print("Macro F1 Dev: ", f1_score(dev_labels, y_pred_dev, average='macro'))
    print("Macro F1 Test: ", f1_score(test_labels, y_pred_test, average='macro'))
    print("Micro F1 Train: ", f1_score(train_labels, y_pred_train, average='micro'))
    print("Micro F1 Dev: ", f1_score(dev_labels, y_pred_dev, average='micro'))
    print("Micro F1 Test: ", f1_score(test_labels, y_pred_test, average='micro'))
    print("Weighted Recall Train: ", recall_score(train_labels, y_pred_train, average='weighted'))
    print("Weighted Recall Dev: ", recall_score(dev_labels, y_pred_dev, average='weighted'))
    print("Weighted Recall Test: ", recall_score(test_labels, y_pred_test, average='weighted'))
    print("Macro Recall Train: ", recall_score(train_labels, y_pred_train, average='macro'))
    print("Macro Recall Dev: ", recall_score(dev_labels, y_pred_dev, average='macro'))
    print("Macro Recall Test: ", recall_score(test_labels, y_pred_test, average='macro'))
    print("Micro Recall Train: ", recall_score(train_labels, y_pred_train, average='micro'))
    print("Micro Recall Dev: ", recall_score(dev_labels, y_pred_dev, average='micro'))
    print("Micro Recall Test: ", recall_score(test_labels, y_pred_test, average='micro'))
    # Confusion Matrix
    print("Confusion Matrix Train: ")
    print(confusion_matrix(train_labels, y_pred_train))
    print("Confusion Matrix Dev: ")
    print(confusion_matrix(dev_labels, y_pred_dev))
    print("Confusion Matrix Test: ")
    print(confusion_matrix(test_labels, y_pred_test))

In [4]:
label_replacement = {
    'Hope_speech': 0,
    'Non_hope_speech': 1,
    'not-English': 2,
}

In [5]:
# Replace labels with numbers
train_labels = [label_replacement[label] for label in train_labels]
dev_labels = [label_replacement[label] for label in dev_labels]
test_labels = [label_replacement[label] for label in test_labels]

### Glove Twitter 25

In [6]:
gt25_train, gt25_dev, gt25_test = load_glove_twitter_25()

In [7]:
# Set all NaN values to 0
gt25_train = np.nan_to_num(gt25_train)
gt25_dev = np.nan_to_num(gt25_dev)
gt25_test = np.nan_to_num(gt25_test)

In [8]:
k_means = KMeans(n_clusters=3, random_state=0).fit(gt25_train, train_labels)

In [9]:
train_preds = k_means.predict(gt25_train)
dev_preds = k_means.predict(gt25_dev)
test_preds = k_means.predict(gt25_test)

In [10]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.18153062121078992
Accuracy Dev:  0.17622230038691522
Accuracy Test:  0.18657765284609978
Weighted F1 Train:  0.24751635808632247
Weighted F1 Dev:  0.24277870398836413
Weighted F1 Test:  0.25577147685913026
Macro F1 Train:  0.147388082222974
Macro F1 Dev:  0.14283631492082952
Macro F1 Test:  0.15016522555352096
Micro F1 Train:  0.1815306212107899
Micro F1 Dev:  0.17622230038691522
Micro F1 Test:  0.18657765284609978
Weighted Recall Train:  0.18153062121078992
Weighted Recall Dev:  0.17622230038691525
Weighted Recall Test:  0.18657765284609978
Macro Recall Train:  0.32618430603949866
Macro Recall Dev:  0.2107389958708279
Macro Recall Test:  0.23452063247204014
Micro Recall Train:  0.18153062121078992
Micro Recall Dev:  0.17622230038691522
Micro Recall Test:  0.18657765284609978
Confusion Matrix Train: 
[[1099   70  793]
 [8657 3027 9094]
 [   4   12    6]]
Confusion Matrix Dev: 
[[ 133    8  131]
 [1083  368 1118]
 [   0    2    0]]
Confusion Matrix Test: 
[[ 138    5 

### FastText 300 

In [11]:
ft300_train, ft300_dev, ft300_test = load_fasttext_300()

In [12]:
# Set all NaN values to 0
ft300_train = np.nan_to_num(ft300_train)
ft300_dev = np.nan_to_num(ft300_dev)
ft300_test = np.nan_to_num(ft300_test)

In [13]:
k_means = KMeans(n_clusters=3, random_state=0).fit(ft300_train, train_labels)

In [14]:
train_preds = k_means.predict(ft300_train)
dev_preds = k_means.predict(ft300_dev)
test_preds = k_means.predict(ft300_test)

In [15]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.09138037079342765
Accuracy Dev:  0.09672880759760816
Accuracy Test:  0.08995080815179199
F1 Train:  0.09425327100502011
F1 Dev:  0.10600388101230933
F1 Test:  0.09230425202211685
Precision Train:  0.34223776163350256
Precision Dev:  0.3454517868120958
Precision Test:  0.34024789171993636
Recall Train:  0.3564043982385188
Recall Dev:  0.30960108839176
Recall Test:  0.34402810986844923
Confusion Matrix Train: 
[[  624   115  1223]
 [ 5633  1441 13704]
 [    7     0    15]]
Confusion Matrix Dev: 
[[  98   18  156]
 [ 663  176 1730]
 [   1    0    1]]
Confusion Matrix Test: 
[[  74   15  161]
 [ 691  180 1722]
 [   1    0    2]]


### Word2Vec 300

In [16]:
w2v300_train, w2v300_dev, w2v300_test = load_word2vec_300()

In [17]:
# Set all NaN values to 0
w2v300_train = np.nan_to_num(w2v300_train)
w2v300_dev = np.nan_to_num(w2v300_dev)
w2v300_test = np.nan_to_num(w2v300_test)

In [18]:
k_means = KMeans(n_clusters=3, random_state=0).fit(w2v300_train, train_labels)

In [19]:
train_preds = k_means.predict(w2v300_train)
dev_preds = k_means.predict(w2v300_dev)
test_preds = k_means.predict(w2v300_test)

In [20]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.601704595378262
Accuracy Dev:  0.5993668659866338
Accuracy Test:  0.5948699929725931
F1 Train:  0.259209304347983
F1 Dev:  0.2591562570724056
F1 Test:  0.25488634436540464
Precision Train:  0.3066791913582484
Precision Dev:  0.30496220828068415
Precision Test:  0.30158778930235264
Recall Train:  0.3596312774201172
Recall Dev:  0.3909218319480381
Recall Test:  0.4408071303080945
Confusion Matrix Train: 
[[   24  1452   486]
 [ 1540 13663  5575]
 [    1    12     9]]
Confusion Matrix Dev: 
[[   3  195   74]
 [ 178 1700  691]
 [   1    0    1]]
Confusion Matrix Test: 
[[   1  193   56]
 [ 194 1690  709]
 [   0    1    2]]


### TF-IDF PCA (1000 Dims)

In [21]:
tfidf_pca_train, tfidf_pca_dev, tfidf_pca_test = load_tfidf_pca()

In [22]:
k_means = KMeans(n_clusters=3, random_state=0).fit(tfidf_pca_train, train_labels)

In [23]:
train_preds = k_means.predict(tfidf_pca_train)
dev_preds = k_means.predict(tfidf_pca_dev)
test_preds = k_means.predict(tfidf_pca_test)

In [24]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.13904753536596082
Accuracy Dev:  0.13823425958494548
Accuracy Test:  0.15038650737877723
F1 Train:  0.1378300930462873
F1 Dev:  0.14548665436659838
F1 Test:  0.15596801860050574
Precision Train:  0.3566092863759667
Precision Dev:  0.36888950434125073
Precision Test:  0.37446859668065735
Recall Train:  0.36377233556457167
Recall Dev:  0.2810823239377495
Recall Test:  0.45904974932510606
Confusion Matrix Train: 
[[  362   318  1282]
 [ 1736  2786 16256]
 [    1     4    17]]
Confusion Matrix Dev: 
[[  58   36  178]
 [ 226  334 2009]
 [   1    0    1]]
Confusion Matrix Test: 
[[  59   34  157]
 [ 226  366 2001]
 [   0    0    3]]


### Sentence Transformer Faster No PCA

In [25]:
train, dev, test = load_sent_trans_fast_no_pca()

In [26]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)

In [27]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [28]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.18153062121078992
Accuracy Dev:  0.1790362293352093
Accuracy Test:  0.18587491215741392
F1 Train:  0.14011556969253425
F1 Dev:  0.14181942622918112
F1 Test:  0.1460971673361054
Precision Train:  0.30551191414821555
Precision Dev:  0.3050709246535144
Precision Test:  0.3132600246969472
Recall Train:  0.27789482147876393
Recall Dev:  0.30709095627351757
Recall Test:  0.4852860264815529
Confusion Matrix Train: 
[[  490   747   725]
 [ 5069  3633 12076]
 [    4     9     9]]
Confusion Matrix Dev: 
[[  68   99  105]
 [ 623  440 1506]
 [   1    0    1]]
Confusion Matrix Test: 
[[  70   86   94]
 [ 655  456 1482]
 [   0    0    3]]


### Sentence Transformer Faster PCA

In [11]:
train, dev, test = load_sent_trans_fast_pca()

In [12]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)

In [13]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [14]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.25498638081012215
Accuracy Dev:  0.25606753429475904
Accuracy Test:  0.26317638791286013
Weighted F1 Train:  0.3598936000704762
Weighted F1 Dev:  0.35593078941038625
Weighted F1 Test:  0.3687425678969312
Macro F1 Train:  0.16236791928746389
Macro F1 Dev:  0.16451878372140416
Macro F1 Test:  0.16584946491566122
Micro F1 Train:  0.25498638081012215
Micro F1 Dev:  0.25606753429475904
Micro F1 Test:  0.26317638791286013
Weighted Recall Train:  0.25498638081012215
Weighted Recall Dev:  0.25606753429475904
Weighted Recall Test:  0.26317638791286013
Macro Recall Train:  0.340719445147117
Macro Recall Dev:  0.20951207458460222
Macro Recall Test:  0.20953438745340017
Micro Recall Train:  0.25498638081012215
Micro Recall Dev:  0.25606753429475904
Micro Recall Test:  0.26317638791286013
Confusion Matrix Train: 
[[  724   490   748]
 [12077  5071  3630]
 [    9     4     9]]
Confusion Matrix Dev: 
[[ 105   68   99]
 [1506  623  440]
 [   1    1    0]]
Confusion Matrix Test: 
[[ 

### Sentence Transformer Better No PCA

In [33]:
train, dev, test = load_sent_trans_better_no_pca()

In [34]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)

In [35]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [36]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.26276249890167824
Accuracy Dev:  0.2655645444952515
Accuracy Test:  0.26879831342234717
F1 Train:  0.16384026720836667
F1 Dev:  0.16543060063977458
F1 Test:  0.16483106838829087
Precision Train:  0.31950737335931884
Precision Dev:  0.31541708431270415
Precision Test:  0.3132002345714507
Recall Train:  0.3390013963269374
Recall Dev:  0.18671767071951395
Recall Test:  0.1874955649826456
Confusion Matrix Train: 
[[  596   546   820]
 [11501  5375  3902]
 [    8     4    10]]
Confusion Matrix Dev: 
[[  81   80  111]
 [1431  674  464]
 [   1    1    0]]
Confusion Matrix Test: 
[[  74   85   91]
 [1429  691  473]
 [   3    0    0]]


### Sentence Transformer Better PCA

In [37]:
train, dev, test = load_sent_trans_better_pca()

In [38]:
k_means = KMeans(n_clusters=3, random_state=0).fit(train, train_labels)

In [39]:
train_preds = k_means.predict(train)
dev_preds = k_means.predict(dev)
test_preds = k_means.predict(test)

In [40]:
computeAllScores(train_preds, dev_preds, test_preds)

Accuracy Train:  0.2627185660311045
Accuracy Dev:  0.2655645444952515
Accuracy Test:  0.26879831342234717
F1 Train:  0.1637930676291687
F1 Dev:  0.16543060063977458
F1 Test:  0.16486310277070693
Precision Train:  0.3194769438592779
Precision Dev:  0.31541708431270415
Precision Test:  0.31322201506178254
Recall Train:  0.33867764960549235
Recall Dev:  0.18671767071951395
Recall Test:  0.1874955649826456
Confusion Matrix Train: 
[[  594   546   822]
 [11487  5376  3915]
 [    8     4    10]]
Confusion Matrix Dev: 
[[  81   80  111]
 [1431  674  464]
 [   1    1    0]]
Confusion Matrix Test: 
[[  74   85   91]
 [1427  691  475]
 [   3    0    0]]
