<a href="https://colab.research.google.com/github/lwachowiak/Term-Extraction-With-Language-Models/blob/main/Term_Extraction_Sequence_Classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports


In [None]:
!pip install transformers
!pip install sacremoses
!pip install sentencepiece

In [3]:
#torch and tranformers for model and training
import torch  
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset
from transformers import XLMRobertaTokenizer              
from transformers import XLMRobertaForSequenceClassification
from transformers import AdamW                            
from transformers import get_linear_schedule_with_warmup
import sentencepiece

#sklearn for evaluation
from sklearn import preprocessing                       
from sklearn.metrics import classification_report        
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import ParameterGrid         
from sklearn.model_selection import ParameterSampler      
from sklearn.utils.fixes import loguniform

#nlp preprocessing
from nltk import ngrams                                 
from spacy.pipeline import SentenceSegmenter
from spacy.lang.en import English
from spacy.pipeline import Sentencizer
from sacremoses import MosesTokenizer, MosesDetokenizer


#utilities
import pandas as pd
import glob, os
import time
import datetime
import random
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline
import seaborn as sns
import pickle

In [None]:
# connect to GPU 
device = torch.device('cuda')

print('Connected to GPU:', torch.cuda.get_device_name(0))

Connected to GPU: Tesla P100-PCIE-16GB


# Prepare Data

Training Data: corp, wind

Valid: equi

Test Data: htfl

In [None]:
#load terms

#en
df_corp_terms_en=pd.read_csv('ACTER-master/ACTER-master/en/corp/annotations/corp_en_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_equi_terms_en=pd.read_csv('ACTER-master/ACTER-master/en/equi/annotations/equi_en_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_htfl_terms_en=pd.read_csv('ACTER-master/ACTER-master/en/htfl/annotations/htfl_en_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_wind_terms_en=pd.read_csv('ACTER-master/ACTER-master/en/wind/annotations/wind_en_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  

#fr
df_corp_terms_fr=pd.read_csv('ACTER-master/ACTER-master/fr/corp/annotations/corp_fr_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_equi_terms_fr=pd.read_csv('ACTER-master/ACTER-master/fr/equi/annotations/equi_fr_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_htfl_terms_fr=pd.read_csv('ACTER-master/ACTER-master/fr/htfl/annotations/htfl_fr_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_wind_terms_fr=pd.read_csv('ACTER-master/ACTER-master/fr/wind/annotations/wind_fr_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  

#nl
df_corp_terms_nl=pd.read_csv('ACTER-master/ACTER-master/nl/corp/annotations/corp_nl_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_equi_terms_nl=pd.read_csv('ACTER-master/ACTER-master/nl/equi/annotations/equi_nl_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_htfl_terms_nl=pd.read_csv('ACTER-master/ACTER-master/nl/htfl/annotations/htfl_nl_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  
df_wind_terms_nl=pd.read_csv('ACTER-master/ACTER-master/nl/wind/annotations/wind_nl_terms_nes.ann', delimiter="\t", names=["Term", "Label"])  

labels=["Random", "Term"]

In [None]:
# example terms
df_wind_terms_en

Unnamed: 0,Term,Label
0,48/600,Named_Entity
1,4energia,Named_Entity
2,4energy,Named_Entity
3,"ab ""lietuvos energija""",Named_Entity
4,ab lietuvos elektrine,Named_Entity
...,...,...
1529,zhiquan,Named_Entity
1530,çetinkaya,Named_Entity
1531,çeti̇nkaya,Named_Entity
1532,çeşme,Named_Entity


**Functions for preprocessing and creating of Training Data**

In [None]:
#load all text files from folder into a string
def load_text_corpus(path):
  text_data=""
  print(glob.glob(path))
  for file in glob.glob(path+"*.txt"):
      print(file)
      with open(file) as f:
        temp_data = f.read()
        print(len(temp_data))
        text_data=text_data+" "+temp_data
  print(len(text_data))
  return text_data

In [None]:
#split in sentences and tokenize
def preprocess(text):
  #sentenize (from spacy)
  sentencizer = Sentencizer()
  nlp = English()
  nlp.add_pipe(sentencizer)
  doc = nlp(text)

  #tokenize
  sentence_list=[]
  mt = MosesTokenizer(lang='en')
  for s in doc.sents:
    tokenized_text = mt.tokenize(s, return_str=True)
    sentence_list.append((tokenized_text.split(), s))     #append tuple of tokens and original senteence
  return sentence_list


In [None]:
#input is list of sentences and dataframe containing terms
def create_training_data(sentence_list, df_terms, n):

  #create empty dataframe
  training_data = pd.DataFrame(columns=['n_gram', 'Context', 'Label', "Termtype"])

  md = MosesDetokenizer(lang='en')


  print(len(sentence_list))
  count=0

  for sen in sentence_list:
    count+=1
    if count%100==0:print(count)

    s=sen[0]  #take first part of tuple, i.e. the tokens

    # 1-gram up to n-gram
    for i in range(1,n+1):
      #create n-grams of this sentence
      n_grams = ngrams(s, i)

      #look if n-grams are in the annotation dataset
      for n_gram in n_grams: 
        n_gram=md.detokenize(n_gram) 
        context=str(sen[1]).strip()
        #if yes add an entry to the training data
        if n_gram.lower() in df_terms.values:
          #append positive sample
          #get termtype like common term
          termtype="/"#df_terms.loc[df_terms['Term'] == n_gram.lower()].iloc[0]["Label"]
          training_data = training_data.append({'n_gram': n_gram, 'Context': context, 'Label': 1, "Termtype":termtype}, ignore_index=True)
        else:
          #append negative sample
          training_data = training_data.append({'n_gram': n_gram, 'Context': context, 'Label': 0, "Termtype":"None"}, ignore_index=True)

  return training_data

  

**Create Training Data**

In [None]:
# en 
#create trainings data for all corp texts
corp_text_en=load_text_corpus("ACTER-master/ACTER-master/en/corp/texts/annotated/") # load test
corp_s_list=preprocess(corp_text_en)                                                # preprocess
train_data_corp_en=create_training_data(corp_s_list, df_corp_terms_en, 6)           # create training data

#create trainings data for all wind texts
wind_text_en=load_text_corpus("ACTER-master/ACTER-master/en/wind/texts/annotated/") # load test
wind_s_list=preprocess(wind_text_en)                                                # preprocess
train_data_wind_en=create_training_data(wind_s_list, df_wind_terms_en, 6)           # create training data

#create trainings data for all equi texts
equi_text_en=load_text_corpus("ACTER-master/ACTER-master/en/equi/texts/annotated/") # load test
equi_s_list=preprocess(equi_text_en)                                                # preprocess
train_data_equi_en=create_training_data(equi_s_list, df_equi_terms_en, 6)           # create training data

#create trainings data for all htfl texts
htfl_text_en=load_text_corpus("ACTER-master/ACTER-master/en/htfl/texts/annotated/") # load test
htfl_s_list=preprocess(htfl_text_en)                                                # preprocess
train_data_htfl_en=create_training_data(htfl_s_list, df_htfl_terms_en, 6)           # create training data 

In [None]:
#fr
corp_text_fr=load_text_corpus("ACTER-master/ACTER-master/fr/corp/texts/annotated/") # load text
corp_s_list=preprocess(corp_text_fr)                                                # preprocess
train_data_corp_fr=create_training_data(corp_s_list, df_corp_terms_fr, 6)           # create training data

wind_text_fr=load_text_corpus("ACTER-master/ACTER-master/fr/wind/texts/annotated/") # load text
wind_s_list=preprocess(wind_text_fr)                                                # preprocess
train_data_wind_fr=create_training_data(wind_s_list, df_wind_terms_fr, 6)           # create training data

equi_text_fr=load_text_corpus("ACTER-master/ACTER-master/fr/equi/texts/annotated/") # load text
equi_s_list=preprocess(equi_text_fr)                                                # preprocess
train_data_equi_fr=create_training_data(equi_s_list, df_equi_terms_fr, 6)           # create training data

htfl_text_fr=load_text_corpus("ACTER-master/ACTER-master/fr/htfl/texts/annotated/") # load text
htfl_s_list=preprocess(htfl_text_fr)                                                # preprocess
train_data_htfl_fr=create_training_data(htfl_s_list, df_htfl_terms_fr, 6)           # create training data 

In [None]:
#nl
corp_text_nl=load_text_corpus("ACTER-master/ACTER-master/nl/corp/texts/annotated/") # load text
corp_s_list=preprocess(corp_text_nl)                                                # preprocess
train_data_corp_nl=create_training_data(corp_s_list, df_corp_terms_nl, 6)           # create training data

wind_text_nl=load_text_corpus("ACTER-master/ACTER-master/nl/wind/texts/annotated/") # load text
wind_s_list=preprocess(wind_text_nl)                                                # preprocess
train_data_wind_nl=create_training_data(wind_s_list, df_wind_terms_nl, 6)           # create training data

equi_text_nl=load_text_corpus("ACTER-master/ACTER-master/nl/equi/texts/annotated/") # load text
equi_s_list=preprocess(equi_text_nl)                                                # preprocess
train_data_equi_nl=create_training_data(equi_s_list, df_equi_terms_nl, 6)           # create training data

htfl_text_nl=load_text_corpus("ACTER-master/ACTER-master/nl/htfl/texts/annotated/") # load text
htfl_s_list=preprocess(htfl_text_nl)                                                # preprocess
train_data_htfl_nl=create_training_data(htfl_s_list, df_htfl_terms_nl, 6)           # create training data 

In [None]:
print(train_data_corp_en.groupby('Label').count())
print(train_data_wind_en.groupby('Label').count())
print(train_data_equi_en.groupby('Label').count())
print(train_data_htfl_en.groupby('Label').count())

       n_gram  Context  Termtype
Label                           
0      274139   274139    274139
1        8708     8708      8708
       n_gram  Context  Termtype
Label                           
0      311535   311535    311535
1       10542    10542     10542
       n_gram  Context  Termtype
Label                           
0      298863   298863    298863
1       13891    13891     13891
       n_gram  Context  Termtype
Label                           
0      290334   290334    290334
1       14376    14376     14376


In [None]:
train_data_equi_en

Unnamed: 0,n_gram,Context,Label,Termtype
0,Pirouette,Pirouette (dressage)\n\nA Pirouette is a Frenc...,1,Specific_Term
1,(,Pirouette (dressage)\n\nA Pirouette is a Frenc...,0,
2,dressage,Pirouette (dressage)\n\nA Pirouette is a Frenc...,1,Common_Term
3,),Pirouette (dressage)\n\nA Pirouette is a Frenc...,0,
4,A,Pirouette (dressage)\n\nA Pirouette is a Frenc...,1,Specific_Term
...,...,...,...,...
312749,about it when he's done,Stop and let your horse think about it when he...,0,
312750,it when he's done something,Stop and let your horse think about it when he...,0,
312751,when he's done something right,Stop and let your horse think about it when he...,0,
312752,he's done something right.,Stop and let your horse think about it when he...,0,


**Undersample**

In [None]:
#undersample class 0 so the amount of trainingsample is the same as label 1 

def undersample(train_data):
# Class count
  print("Before")
  print(train_data.Label.value_counts())
  count_class_0, count_class_1 = train_data.Label.value_counts()

  # Divide by class
  df_class_0 = train_data[train_data['Label'] == 0]
  df_class_1 = train_data[train_data['Label'] == 1]

  df_class_0_under = df_class_0.sample(count_class_1)
  df_test_under = pd.concat([df_class_0_under, df_class_1], axis=0)

  print("After")
  print(df_test_under.Label.value_counts())

  return df_test_under

In [None]:
# undersample the trainingsdata

#en
train_data_corp_en=undersample(train_data_corp_en)

train_data_wind_en=undersample(train_data_wind_en)


#fr
train_data_corp_fr=undersample(train_data_corp_fr)

train_data_wind_fr=undersample(train_data_wind_fr)


#nl
train_data_corp_nl=undersample(train_data_corp_nl)

train_data_wind_nl=undersample(train_data_wind_nl)

Before
0    274139
1      8708
Name: Label, dtype: int64
After
1    8708
0    8708
Name: Label, dtype: int64
Before
0    311535
1     10542
Name: Label, dtype: int64
After
1    10542
0    10542
Name: Label, dtype: int64
Before
0    325242
1      7443
Name: Label, dtype: int64
After
1    7443
0    7443
Name: Label, dtype: int64
Before
0    356805
1      9293
Name: Label, dtype: int64
After
1    9293
0    9293
Name: Label, dtype: int64
Before
0    283267
1      7071
Name: Label, dtype: int64
After
1    7071
0    7071
Name: Label, dtype: int64
Before
0    287361
1      5582
Name: Label, dtype: int64
After
1    5582
0    5582
Name: Label, dtype: int64


In [None]:
#concat trainingsdata
trainings_data_df = pd.concat([train_data_corp_en, train_data_wind_en,  train_data_corp_fr, train_data_wind_fr, train_data_corp_nl, train_data_wind_nl])

valid_data_df = train_data_equi_en #pd.concat([train_data_equi_en, train_data_equi_fr, train_data_equi_nl ])

test_data_df_en = train_data_htfl_en
test_data_df_fr = train_data_htfl_fr
test_data_df_nl = train_data_htfl_nl

print(len(trainings_data_df))
print(len(valid_data_df))
print(len(test_data_df_en))
print(len(test_data_df_fr))
print(len(test_data_df_nl))

97278
312754
304710
303069
292615


**Tokenizer**

In [None]:
xlmr_tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5069051.0, style=ProgressStyle(descript…




In [None]:
def tokenizer_xlm(data, max_len):
  labels_ = []
  input_ids_ = []
  attn_masks_ = []

  # for each datasample:
  for index, row in data.iterrows():

      sentence = row['n_gram']+". "+row["Context"]
      #print(sentence)
     
      # create requiered input, i.e. ids and attention masks
      encoded_dict = xlmr_tokenizer.encode_plus(sentence,
                                                max_length=max_len, 
                                                padding='max_length',
                                                truncation=True, 
                                                return_tensors='pt')

      # add encoded sample to lists
      input_ids_.append(encoded_dict['input_ids'])
      attn_masks_.append(encoded_dict['attention_mask'])
      labels_.append(row['Label'])
      
  # Convert each Python list of Tensors into a 2D Tensor matrix.
  input_ids_ = torch.cat(input_ids_, dim=0)
  attn_masks_ = torch.cat(attn_masks_, dim=0)

  # labels to tensor
  labels_ = torch.tensor(labels_)

  print('Encoder finished. {:,} examples.'.format(len(labels_)))
  return input_ids_, attn_masks_, labels_

In [None]:
#tokenize input for the different training/test sets
max_len=64

input_ids_train, attn_masks_train, labels_all_train = tokenizer_xlm(trainings_data_df, max_len)

input_ids_valid, attn_masks_valid, labels_all_valid = tokenizer_xlm(valid_data_df, max_len)

input_ids_test_en, attn_masks_test_en, labels_test_en = tokenizer_xlm(test_data_df_en, max_len)
input_ids_test_fr, attn_masks_test_fr, labels_test_fr = tokenizer_xlm(test_data_df_fr, max_len)
input_ids_test_nl, attn_masks_test_nl, labels_test_nl = tokenizer_xlm(test_data_df_nl, max_len)

Encoder finished. 97,278 examples.
Encoder finished. 312,754 examples.
Encoder finished. 304,710 examples.
Encoder finished. 303,069 examples.
Encoder finished. 292,615 examples.


In [None]:
# create datasets
train_dataset = TensorDataset(input_ids_train, attn_masks_train, labels_all_train)

valid_dataset = TensorDataset(input_ids_valid, attn_masks_valid, labels_all_valid)

test_dataset_en = TensorDataset(input_ids_test_en, attn_masks_test_en, labels_test_en)
test_dataset_fr = TensorDataset(input_ids_test_fr, attn_masks_test_fr, labels_test_fr)
test_dataset_nl = TensorDataset(input_ids_test_nl, attn_masks_test_nl, labels_test_nl)

In [None]:
# create dataloaders
batch_size = 32

train_dataloader = DataLoader(train_dataset, sampler = RandomSampler(train_dataset), batch_size = batch_size) #random sampling
valid_dataloader = DataLoader(valid_dataset, sampler = SequentialSampler(valid_dataset),batch_size = batch_size ) #sequential sampling

test_dataloader_en = DataLoader(test_dataset_en, sampler = SequentialSampler(test_dataset_en),batch_size = batch_size ) #sequential sampling
test_dataloader_fr = DataLoader(test_dataset_fr, sampler = SequentialSampler(test_dataset_fr),batch_size = batch_size ) #sequential sampling
test_dataloader_nl = DataLoader(test_dataset_nl, sampler = SequentialSampler(test_dataset_nl),batch_size = batch_size ) #sequential sampling

#Model

In [None]:
def create_model(lr, eps, train_dataloader, epochs, device):
  xlmr_model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=2)
  desc = xlmr_model.to(device)
  print('Connected to GPU:', torch.cuda.get_device_name(0))
  optimizer = AdamW(xlmr_model.parameters(),
                  lr = lr,   
                  eps = eps       
                )
  total_steps = len(train_dataloader) * epochs
  scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0,   
                                            num_training_steps = total_steps)
  return xlmr_model, optimizer, scheduler

In [None]:
def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))  

In [None]:
def validate(validation_dataloader, validation_df, xlmr_model, verbose, print_cm): 
  
  # put model in evaluation mode 
  xlmr_model.eval()

  #extract terms and compute scores
  extracted_terms_equi=extract_terms(train_data_equi_en, xlmr_model)
  extracted_terms_equi_en = set([item.lower() for item in extracted_terms_equi_en])
  gold_set_equi_en=set(df_equi_terms_en["Term"])
  true_pos=extracted_terms_equi_en.intersection(gold_set_equi_en)
  recall=len(true_pos)/len(gold_set_equi_en)
  precision=len(true_pos)/len(extracted_terms_equi_en)
  f1=2*(precision*recall)/(precision+recall)

  return recall, precision, f1

In [None]:
def extract_terms(validation_df, xlmr_model): 
  print(len(validation_df))
  term_list=[]

  # put model in evaluation mode 
  xlmr_model.eval()

  for index, row in validation_df.iterrows():
    sentence = row['n_gram']+". "+row["Context"]
    label=validation_df["Label"]

    encoded_dict = xlmr_tokenizer.encode_plus(sentence, 
                                                  max_length=max_len, 
                                                  padding='max_length',
                                                  truncation=True, 
                                                  return_tensors='pt') 
    input_id=encoded_dict['input_ids'].to(device)
    attn_mask=encoded_dict['attention_mask'].to(device)
    label=torch.tensor(0).to(device)    

    with torch.no_grad():                
      output = xlmr_model(input_id, 
                                      token_type_ids=None, 
                                      attention_mask=attn_mask,
                                      labels=label)
      loss=output.loss
      logits=output.logits
      
    logits = logits.detach().cpu().numpy()
    pred=labels[logits[0].argmax(axis=0)]
    if pred=="Term":
      term_list.append(row['n_gram'])

  return set(term_list)
      

In [None]:
def train_model(epochs, xlmr_model, train_dataloader, validation_dataloader, validation_df, random_seed, verbose, optimizer, scheduler):

  seed_val = random_seed

  random.seed(seed_val)
  np.random.seed(seed_val)
  torch.manual_seed(seed_val)
  torch.cuda.manual_seed_all(seed_val)

  # mostly contains scores about how the training went for each epoch
  training_stats = []

  # total training time
  total_t0 = time.time()

  print('\033[1m'+"================ Model Training ================"+'\033[0m')

  # For each epoch...
  for epoch_i in range(0, epochs):

      print("")
      print('\033[1m'+'======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)+'\033[0m')

      t0 = time.time()

      # summed training loss of the epoch
      total_train_loss = 0


      # model is being put into training mode as mechanisms like dropout work differently during train and test time
      xlmr_model.train()

      # iterrate over batches
      for step, batch in enumerate(train_dataloader):

          # unpack training batch at load it to gpu (device)  
          b_input_ids = batch[0].to(device)
          b_input_mask = batch[1].to(device)
          b_labels = batch[2].to(device)

          # clear gradients before calculating new ones
          xlmr_model.zero_grad()        

          # forward pass with current batch
          output = xlmr_model(b_input_ids, 
                              token_type_ids=None, 
                              attention_mask=b_input_mask, 
                              labels=b_labels)
          
          loss=output.loss
          logits=output.logits

          # add up the loss
          total_train_loss += loss.item()

          # calculate new gradients
          loss.backward()

          # gradient clipping (not bigger than)
          torch.nn.utils.clip_grad_norm_(xlmr_model.parameters(), 1.0)

          # Update the networks weights based on the gradient as well as the optimiziers parameters
          optimizer.step()

          # lr update
          scheduler.step()

      # avg loss over all batches
      avg_train_loss = total_train_loss / len(train_dataloader)            
      
      # training time of this epoch
      training_time = format_time(time.time() - t0)

      print("")
      print("  Average training loss: {0:.2f}".format(avg_train_loss))
      print("  Training epoch took: {:}".format(training_time))
          
  
      # VALIDATION
      print("evaluate")
      if epoch_i==epochs-1:print_cm=True #Print out cm in final iteration
      else: print_cm=False
      recall, precision, f1 = validate(validation_dataloader, validation_df, xlmr_model, verbose, print_cm)   
       

      #print('\033[1m'+ "  Validation Loss All: {0:.2f}".format(avg_val_loss) + '\033[0m')

      training_stats.append(
          {
              'epoch': epoch_i + 1,
              'Training Loss': avg_train_loss,
              "precision": precision,
              "recall": recall,
              "f1": f1,
              'Training Time': training_time,
          }
      )

      print("Precicion", precision)
      print("Recall", recall)
      print("F1", f1)

  print("\n\nTraining complete!")
  print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
  
  return training_stats


In [None]:
lr=2e-5
eps=1e-8
epochs=3
device = torch.device('cuda')
xlmr_model, optimizer, scheduler = create_model(lr=lr, eps=eps, train_dataloader=train_dataloader, epochs=epochs, device=device)

In [None]:
training_stats=train_model(epochs=epochs,
                           xlmr_model=xlmr_model,
                           train_dataloader=train_dataloader,
                           validation_dataloader=valid_dataloader,
                           validation_df=train_data_htfl_en,
                           random_seed=42,
                           verbose=True,
                           optimizer=optimizer,
                           scheduler=scheduler)

# Test Set Evaluation

In [None]:
extracted_terms_htfl_en=extract_terms(train_data_htfl_en, xlmr_model)
extracted_terms_htfl_fr=extract_terms(train_data_htfl_fr, xlmr_model)
extracted_terms_htfl_nl=extract_terms(train_data_htfl_nl, xlmr_model)

In [None]:
def computeTermEvalMetrics(extracted_terms, gold_df):
  #make lower case cause gold standard is lower case
  extracted_terms = set([item.lower() for item in extracted_terms])
  gold_set=set(gold_df)
  true_pos=extracted_terms.intersection(gold_set)
  recall=len(true_pos)/len(gold_set)
  precision=len(true_pos)/len(extracted_terms)

  print("Intersection",len(true_pos))
  print("Gold",len(gold_set))
  print("Extracted",len(extracted_terms))
  print("Recall:", recall)
  print("Precision:", precision)
  print("F1:", 2*(precision*recall)/(precision+recall))

In [None]:
computeTermEvalMetrics(extracted_terms_htfl_en, df_htfl_terms_en["Term"])

In [None]:
computeTermEvalMetrics(extracted_terms_htfl_fr, df_htfl_terms_fr["Term"])

In [None]:
computeTermEvalMetrics(extracted_terms_htfl_nl, df_htfl_terms_nl["Term"])