Test Notebook

In [1]:
import nltk
import re
nltk.download("brown")
from wordnet_interface import WordNetInterface
from model import NThresholdModel, ContextualMaoModel,RandomBaseline,Models
import os
from nltk.tokenize import word_tokenize
from embeddings import FasttextModel,WordAssociationEmbeddings, BertEmbeddings,Node2VecEmbeddingsCreator,Node2VecEmbeddings
from data import Sentence, DataSet, Vectors
from swow_interface import SWOWInterface

[nltk_data] Downloading package brown to
[nltk_data]     /home/users1/kashefnd/nltk_data...
[nltk_data]   Package brown is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


Sentence Extraction Function for the two datasets used in the evaluation

In [1]:
# edit filepaths to the two datasets
mohammad_filepath=""
knuples_filepath=""

In [3]:
def knuples_extractor(filepath, use_unsure):
    sentences = []
    fail_counter = 0
    not_agree_counter=0
    with open(filepath) as data:
        i=0
        for line in data:
            i+=1
            datapoint = line.split("\t")
            if (datapoint[2] != "unsure" and datapoint[1] == datapoint[2]) or (
                datapoint[2] == "unsure" and use_unsure
            ):
                try:
                    if datapoint[2] == "unsure":
                        value = 1
                    elif datapoint[2] == "literal" and use_unsure:
                        value = 2
                    elif datapoint[2] == "literal":
                        value = 1
                    elif datapoint[2] == "figurative":
                        value = 0
                    else:
                        print(f"{datapoint[2]} is not a valid value")
                        raise ValueError(f"{datapoint[2]} is not a valid value")
                    verb_sentence = Sentence(
                        sentence=datapoint[3],
                        target=datapoint[0].split()[0],
                        value=value,
                        phrase=datapoint[0],
                        pos="v",
                    )
                    noun_sentence = Sentence(
                        sentence=datapoint[3],
                        target=datapoint[0].split()[1],
                        value=value,
                        phrase=datapoint[0],
                        pos="n",
                    )
                    sentences += [verb_sentence, noun_sentence]
                except ValueError as e:
                    fail_counter += 1
            else:
                not_agree_counter+=1
    print(f"Ignored {fail_counter} bad sentences of {i} ")
    print(f'Ignored {not_agree_counter} of {i} because of missing agreement to annotation')
    return sentences

In [None]:
def mohammad_extractor(filepath, use_unsure):
    sentences = []
    fail_counter = 0
    with open(filepath) as data:
        data.readline()
        i=0
        for line in data:
            i+=1
            datapoint = line.split("\t")
            if len(datapoint) == 5 and (float(datapoint[4]) >= 0.7 or use_unsure):
                if float(datapoint[4]) < 0.7:
                    value = 1
                else:
                    if datapoint[3] == "literal" and use_unsure:
                        value = 2
                    elif datapoint[3] == "literal":
                        value = 1
                    elif datapoint[3] == "metaphorical":
                        value = 0
                    else:
                        print(f"{datapoint[3]} is not a valid value")
                        fail_counter+=1
                        raise ValueError(f"{datapoint[3]} is not a valid value")
                try:
                    # remove special tokens
                    tokens = re.sub(r"<.*?>", "", datapoint[2])
                    sentence = Sentence(
                        sentence=tokens, target=datapoint[0], value=value, pos="v"
                    )
                    sentences.append(sentence)
                except ValueError:
                    fail_counter += 1
    print(f"Ignored {fail_counter} of {i}")
    return sentences

Loading Interfaces, Embeddings and Data

In [1]:
print("creating Interfaces")
wn= WordNetInterface()
# edit the location of the complete response file of the SWOW Data, can also be loaded via parameter strength_file, which can be stored via method write_strengths_to_file
swow_r1_c2=SWOWInterface(number_of_responses=1,response_file="SWOW-EN.complete.20180827.csv",candidate_cap=2)
swow_r1_ppmi=SWOWInterface(number_of_responses=1,response_file="SWOW-EN.complete.20180827.csv",use_ppmi=True,candidate_cap=0)
swow_r12_ppmi=SWOWInterface(number_of_responses=2,response_file="SWOW-EN.complete.20180827.csv",use_ppmi=True,candidate_cap=0)
swow_r123=SWOWInterface(number_of_responses=3,response_file="SWOW-EN.complete.20180827.csv",candidate_cap=0)
swow_r123_ppmi=SWOWInterface(number_of_responses=3,response_file="SWOW-EN.complete.20180827.csv",candidate_cap=0,use_ppmi=True)
wn = WordNetInterface()

RANDOM_SEED=53 # Seed for creating training and test data, 53 for reproducing study results
print("loading embeddings")
# Fasttext embeddings
# ft_embeddings_wn = FasttextModel(load_file=fasttext_dir,fallback_source=wn)
# BERT Embeddings
contextual_embeddings=BertEmbeddings(layers=[9,10,11,12])
# Matrix Factorization Embeddings
# swow_embeddings_r12_ppmi=WordAssociationEmbeddings.create_graph_embeddings(swow=swow_r12_ppmi,index_file="cue_indices_manually_r12.tsv",embedding_file="graph_embeddings/graph_embeddings_manually_ppmi_300_r12.npy",use_only_cues=True,dimensions=300)
matrix_embeddings_r12_ppmi=WordAssociationEmbeddings(swow=swow_r12_ppmi,index_file="cue_indices_manually_r12.tsv",embedding_file="graph_embeddings/graph_embeddings_manually_ppmi_300_r12.npy")
# Node2vec Embeddings
# takes a long time to train 
# n2v_ppmi_r123_creator=Node2VecEmbeddingsCreator(graph=swow_r123_ppmi,is_directed=False,p=0.5,q=0.5)
# n2v_walks=n2v_ppmi_r123_creator.simulate_walks(num_walks=15,walk_length=75)
# n2v_ppmi_r123_creator.create_embeddings(save_file="graph_embeddings/n2v_r123_ppmi_sg.kv",walks=n2v_walks,dimensions=256,window_size=12,sg=True)
#n2v_ppmi_r123=Node2VecEmbeddings(loadf"graph_embeddings/n2v_r123_ppmi_sg.kv"),swow=swow_r123_ppmi)

print("loading datasets")
knuples_data = DataSet(
    filepath=knuples_filepath, extraction_function=knuples_extractor, use_unsure=False, test_seed=RANDOM_SEED,test_split_size=0.2
    )
# knuples_data_unsure=DataSet(
# filepath=knuples_dataset,extraction_function=knuples_extractor,use_unsure=True,test_seed=RANDOM_SEED,test_split_size=0.2
    # )
# mohammad_data_unsure=DataSet(
#     filepath=mohammad_dataset,extraction_function=mohammad_extractor,use_unsure=True,test_seed=RANDOM_SEED,test_split_size=0.2
#     )
mohammad_data = DataSet(
    filepath=mohammad_filepath,
    extraction_function=mohammad_extractor,
    use_unsure=False,test_seed=RANDOM_SEED,test_split_size=0.2
)

creating Interfaces


NameError: name 'WordNetInterface' is not defined

The five best models from our study

In [6]:
baseline_context=ContextualMaoModel(data=mohammad_data,candidate_source=wn,mean_multi_word=True,fit_embeddings=contextual_embeddings,score_embeddings=contextual_embeddings,use_context_vec=True,apply_candidate_weight=False,restrict_pos=["v"],num_classes=2)
swow_candidates_target=ContextualMaoModel(data=mohammad_data,candidate_source=swow_r123,mean_multi_word=True,fit_embeddings=contextual_embeddings,score_embeddings=contextual_embeddings,use_context_vec=False,apply_candidate_weight=False,restrict_pos=["v"],num_classes=2)
swow_candidates_context=ContextualMaoModel(data=mohammad_data,candidate_source=swow_r1_c2,mean_multi_word=True,fit_embeddings=contextual_embeddings,score_embeddings=contextual_embeddings,use_context_vec=True,apply_candidate_weight=True,restrict_pos=["v"],num_classes=2)
swow_embeddings_matrix=NThresholdModel(data=mohammad_data,candidate_source=wn,mean_multi_word=False,fit_embeddings=matrix_embeddings_r12_ppmi,score_embeddings=matrix_embeddings_r12_ppmi,use_output_vec=False,apply_candidate_weight=False,restrict_pos=["v"],num_classes=2)
# Node2vec takes a long time to train, you can use Matrix embeddings instead
swow_model_n2v=NThresholdModel(data=mohammad_data,candidate_source=swow_r1_c2,mean_multi_word=False,fit_embeddings=matrix_embeddings_r12_ppmi,score_embeddings=matrix_embeddings_r12_ppmi,use_output_vec=False,apply_candidate_weight=False,restrict_pos=["v"],num_classes=2)
#swow_model_n2v=NThresholdModel(data=mohammad_data,candidate_source=swow_r1_c2,mean_multi_word=False,fit_embeddings=n2v_ppmi_r123,score_embeddings=n2v_ppmi_r123,use_output_vec=False,apply_candidate_weight=False,restrict_pos=["v"],num_classes=2)
random_baseline=RandomBaseline(data=mohammad_data,candidate_source=wn,score_embeddings=matrix_embeddings_r12_ppmi,restrict_pos=None,num_classes=2)

In [7]:
models:list[NThresholdModel]=[baseline_context,swow_candidates_target,swow_candidates_context,swow_embeddings_matrix,swow_model_n2v,random_baseline]
filenames=["Baseline Context","SWOW Candidates Target","SWOW Candidates Context","test SWOW Embeddings Matrix","test SWOW Both","Random Baseline"]

Evaluation

In [8]:
for model, name in zip(models, filenames):
    if name!= "Random Baseline":
        print(name)
        print("training")
        model.train_thresholds(metrics=["macro_f_1"],data=model.train_dev_data,by_pos=["v"],by_phrase=False)
        print("evaluating")
        model.evaluate(data=model.test_data,save_file="model_results/"+name+".txt",by_pos=["v"],by_phrase=False)
        print("drawing distributions")
        model.draw_distribution_per_class(
            save_file="model_distributions/" + name + ".png",
            title=name,
            data=model.test_data,
            by_pos=["v"],
            labels=["metaphorical","literal"],
        )
print("drawing ROC")
Models.get_recall_curve(
    data=models[0].test_data,
    save_file="recall_curves/" + "_".join(filenames) + ".png",
    models=models,
    graph_labels=filenames,
    by_pos=["v"],
)

Baseline Context
training


100%|██████████| 1014/1014 [03:27<00:00,  4.89it/s]


ignored 0 sentences of 1014


100%|██████████| 746/746 [00:00<00:00, 1631.00it/s]


Best Thresholds: [tensor(0.7301, device='cuda:0')]
Best score:0.525204946607464
evaluating


100%|██████████| 253/253 [00:31<00:00,  7.96it/s]


[[ 20.  54.]
 [ 25. 154.]]
ignored 0 sentences of 253
drawing distributions
SWOW Candidates Target
training


100%|██████████| 1014/1014 [01:58<00:00,  8.59it/s]


ignored 0 sentences of 1014


100%|██████████| 608/608 [00:00<00:00, 1657.12it/s]


Best Thresholds: [tensor(0.7760, device='cuda:0')]
Best score:0.5188194444444445
evaluating


100%|██████████| 253/253 [00:14<00:00, 17.08it/s]


[[ 12.  29.]
 [ 33. 179.]]
ignored 0 sentences of 253
drawing distributions
SWOW Candidates Context
training


100%|██████████| 1014/1014 [02:09<00:00,  7.81it/s]


ignored 0 sentences of 1014


100%|██████████| 802/802 [00:00<00:00, 1626.83it/s]


Best Thresholds: [tensor(0.5412, device='cuda:0')]
Best score:0.5485315623282631
evaluating


100%|██████████| 253/253 [00:29<00:00,  8.71it/s]


[[ 11.  26.]
 [ 34. 182.]]
ignored 0 sentences of 253
drawing distributions
test SWOW Embeddings Matrix
training


100%|██████████| 1014/1014 [00:00<00:00, 2070.16it/s]


ignored 29 sentences of 1014


100%|██████████| 631/631 [00:00<00:00, 26528.13it/s]


Best Thresholds: [0.08510261864089072]
Best score:0.6165800749433372
evaluating


100%|██████████| 253/253 [00:00<00:00, 2202.85it/s]


[[ 20.  58.]
 [ 25. 146.]]
ignored 4 sentences of 253
drawing distributions
test SWOW Both
training


100%|██████████| 1014/1014 [00:00<00:00, 2487.05it/s]


ignored 14 sentences of 1014


100%|██████████| 525/525 [00:00<00:00, 25919.72it/s]


Best Thresholds: [0.2798436058397144]
Best score:0.5342338146250583
evaluating


100%|██████████| 253/253 [00:00<00:00, 2531.90it/s]


[[ 18.  55.]
 [ 27. 151.]]
ignored 2 sentences of 253
drawing distributions
drawing ROC


100%|██████████| 253/253 [00:26<00:00,  9.46it/s]
100%|██████████| 253/253 [00:13<00:00, 19.13it/s]
100%|██████████| 253/253 [00:28<00:00,  8.91it/s]
100%|██████████| 253/253 [00:00<00:00, 1845.02it/s]
100%|██████████| 253/253 [00:00<00:00, 2344.35it/s]
100%|██████████| 253/253 [00:00<00:00, 8018.91it/s]


Examples

In [9]:
model=baseline_context
i=50
sentence=model.train_dev_data[i]
print(sentence.sentence,sentence.target,sentence.value)
print(model.best_fit(sentence),model.get_compare_value(sentence),model.predict(sentence))
print(model.candidate_source.get_candidate_set(sentence.target,pos=["v"]))

the wings of the birds clapped loudly clap 1
beat tensor(0.7303, device='cuda:0') 1
{'applaud', 'clap', 'put', 'gesticulate', 'hit', 'beat', 'spat'}
