# INFO - BERT embeddings ja relatsioonide treening
EstNLTK 1.6 arendusharust saadud [BertTaggeriga](https://github.com/estnltk/estnltk/tree/devel_1.6/estnltk/taggers/embeddings) sõnadele BERT peidetud kihtide vektorite lisamine. Saadud vektoreid kasutatakse ajaseoseid tuvastavate mudelite treenimiseks. Treenimiseks katsetati erinevaid mudeleid ning nende tulemuste järgi valiti sobivaim, mida üritati veel edasi seadistada.

Katsetamiseks võeti BERT-i nelja viimase kihi vektorid kokku liidetuna, nelja viimase kihi vektorite summa, viimase kihi vektori, eelviimase kihi vektori ja kõikide kihtide vektorite summa. Kuna on sündmusi ja ajaväljendeid,  mis koosnevad  mitmest  sõnast, katsetati neile vektorite  valimisel  erinevaid lähenemisi. Sündmuste puhul prooviti: 
- ainult fraasi peasõna vektorit.
- fraasi sõnade vektorite keskmist väärtust
- fraasi sõnade vektorite kaalutatud keskmist väärtust, kus kõrgem kaal on peasõnal
- fraasi  sõnade  vektorite  kaalutatud  keskmist  väärtust,  kus  kaalud  on  sõnaliikide  järgi (suurimast  kaalust  alates:  üheselt  tegusõnaks  liigitatud  sõnad,  nud-ja  tud-vormid, käändsõnad, ülejäänud sõnaliigid)

Mitmesõnaliste ajaväljendite puhul otsustati kasutada ainult väljendi sõnede vektorite keskmist väärtust.

**NB!** Mugavamaks navigeerimiseks on soovitav kasutada Colab keskkonnas sisukorda või muus keskkonnas sarnast *add-on*'i.

# Vajalike teekide installimine

In [1]:
!pip install transformers
!pip install conllu==3.1.1 estnltk==1.6.7b0



# Drive'st lugemiseks

In [2]:
#Failide lugemiseks Colabis
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
os.chdir("/content/drive/My Drive/Colab Notebooks/")

Mounted at /content/drive


In [3]:
drive_path = "Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/"

# EstNLTK'le EstBERT embeddings lisamine

In [4]:
!cp -R ./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/embeddings /usr/local/lib/python3.7/dist-packages/estnltk/taggers/

# Imports

In [5]:
from estnltk import Text, Layer
from estnltk.taggers.embeddings.bert.bert_tagger import BertTagger
from estnltk.converters import text_to_json, json_to_text
import json
import numpy as np
import pickle
import random

# Artiklid

In [6]:
def loe_artiklid():
  drive_path = "Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/"
  artiklite_kaust = drive_path + "artiklid"

  laetud_artiklid = []

  with os.scandir(artiklite_kaust) as entries:
    for entry in entries:
      if entry.is_file():
        data = None
        with open(entry, 'r') as fp:
          data = json.load(fp)
        laetud_artiklid.append(json_to_text(data))

  print("Lugesin sisse", len(laetud_artiklid), "artiklit.")
  return laetud_artiklid

#laetud_artiklid = loe_artiklid()

# BertTagger ja embeddingute lisamine

In [7]:
def salvesta_embeddingutega(text_object):
  text_json = text_to_json(text_object)
  with open(drive_path + "artiklid_embeddingutega/" + text_object.meta['filename'] + "_embeddings" + '.json', 'w') as fp:
      json.dump(text_json, fp)
  print("Salvestatud", drive_path + "artiklid_embeddingutega/" + text_object.meta['filename'] + '.json')

In [8]:
def lisa_embeddingud(artiklid):
  bert_tagger = BertTagger(bert_location='./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/EstBERT', token_level=False)
  bert_tagger_add = BertTagger(bert_location='./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/EstBERT', token_level=False, method="add", output_layer="bert_embeddings_add")
  bert_tagger_lastlayer = BertTagger(bert_location='./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/EstBERT', token_level=False, bert_layers=[-1], output_layer="bert_embeddings_lastlayer")
  bert_tagger_penultimate = BertTagger(bert_location='./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/EstBERT', token_level=False, bert_layers=[-2], output_layer="bert_embeddings_penultimatelayer")
  bert_tagger_all_add = BertTagger(bert_location='./Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/EstBERT', token_level=False, method="add", bert_layers=[-1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12], output_layer="bert_embeddings_all_add")

  for artikkel in artiklid:
    bert_tagger.tag(artikkel)
    bert_tagger_add.tag(artikkel)
    bert_tagger_lastlayer.tag(artikkel)
    bert_tagger_penultimate.tag(artikkel)
    bert_tagger_all_add.tag(artikkel)

    # Mõned embeddingud on numpy.ndarray'ga. Neid ei saa kergelt json faili salvestada
    eemaldada = None
    for layer in ["bert_embeddings", "bert_embeddings_add", "bert_embeddings_lastlayer", "bert_embeddings_penultimatelayer", "bert_embeddings_all_add"]:
      # Eemalda eelmine kiht
      if eemaldada:
        artikkel.pop_layer(eemaldada)
      uus_kiht = Layer(name=layer + "_fixed", text_object=artikkel, attributes=['token', 'bert_embedding'], ambiguous=True)
      for entry in getattr(artikkel, layer):
        new_token = []
        for tk in entry.token[0]:
          new_token.append(tk)
        embedding_info = entry.bert_embedding
        # Kõik np.ndarray'd tuleb teha listiks
        new_embeddings = []
        for embedding in embedding_info:
          try:
            new_embeddings = embedding.tolist()
          except:
            new_embeddings = embedding
        
        uus_kiht.add_annotation(entry.base_span, token=new_token, bert_embedding=new_embeddings)
      # Lisada kiht
      artikkel.add_layer(uus_kiht)
      eemaldada = layer
    
    artikkel.pop_layer(eemaldada)
    salvesta_embeddingutega(artikkel)

  del bert_tagger
  del bert_tagger_add
  del bert_tagger_lastlayer
  del bert_tagger_penultimate
  del bert_tagger_all_add

#lisa_embeddingud(laetud_artiklid)

In [9]:
def loe_embeddingutega_artiklid():
  drive_path = "Loputoo_Ajaseoste_automaatne_tuvastamine_tekstis/"
  artiklite_kaust = drive_path + "artiklid_embeddingutega"

  laetud_artiklid_embeddingutega = []

  with os.scandir(artiklite_kaust) as entries:
    for entry in entries:
      if entry.is_file():
        data = None
        with open(entry, 'r') as fp:
          data = json.load(fp)
        print(data)
        laetud_artiklid_embeddingutega.append(json_to_text(data))

  print("Lugesin sisse", len(laetud_artiklid_embeddingutega), "artikklit.")
  return laetud_artiklid_embeddingutega

laetud_artiklid_embeddingutega = loe_embeddingutega_artiklid()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



# Meetodid artiklist sündmuste embeddingute saamiseks

In [11]:
# Sündmuse fraasist peasõna embeddingu võtmine
# ühe sõna embedding'u väljavalimist (valida class märgendiga EVENT)
def main_event_embedding(artikkel, layer_name="bert_embeddings_fixed"):
  embeddings = dict()
  for g_event in artikkel.gold_events:
    if g_event.event_annotation.split(" ")[1] == 'multiword="true"':
      continue
    for embedding in getattr(artikkel, layer_name):
      if embedding.start >= g_event.start and embedding.end <= g_event.end:
        embeddings[g_event.event_ID] = np.asarray(embedding.bert_embedding)
        break
  return embeddings

# Sündmuse fraasi sõnede embeddingute keskmine
# sõnade embedding'ute keskmise arvutamine
def event_embeddingute_keskmised(artikkel, layer_name="bert_embeddings_fixed"):
  embeddings = dict()
  for g_event in artikkel.gold_events:
    for embedding in getattr(artikkel, layer_name):
      if embedding.start >= g_event.start and embedding.end <= g_event.end:
        if g_event.event_ID not in embeddings.keys():
          embeddings[g_event.event_ID] = [embedding.bert_embedding]
        else:
          embeddings[g_event.event_ID].append(embedding.bert_embedding)
  
  embeddings_keskmised = dict()
  for item in embeddings.items():
    keskmine = np.mean(item[1], 0)
    embeddings_keskmised[item[0]] = keskmine
  
  return embeddings_keskmised

# Sündmuse fraasi sõnede embeddingute kaalutatud keskmine
# sõnade embedding'ute kaalutud keskmise arvutamist, kus kõrgem kaal läheb class märgendiga EVENT-ile
def event_embeddingute_kaalutatud_keskmised(artikkel, class_kaal=0.7, muu_kaal=0.3, layer_name="bert_embeddings_fixed"):
  embeddings = dict()
  for g_event in artikkel.gold_events:
    for embedding in getattr(artikkel, layer_name):
      if embedding.start >= g_event.start and embedding.end <= g_event.end:
        if g_event.event_ID not in embeddings.keys():
          embeddings[g_event.event_ID] = [embedding.bert_embedding]
        else:
          embeddings[g_event.event_ID].append(embedding.bert_embedding)
  
  embeddings_keskmised = dict()
  for item in embeddings.items():
    weights = [muu_kaal for i in item[1]]
    # mitmes on CLASS'iga sõne, sellele peab suurema kaalu andma
    mitmes = -1
    for g_event in artikkel.gold_events:
      if g_event.event_ID == item[0]:
        mitmes += 1
        if g_event.event_annotation.split(" ")[1] != 'multiword="true"':
          break
    weights[mitmes] = class_kaal

    keskmine = np.average(item[1], 0, weights)
    embeddings_keskmised[item[0]] = keskmine
  
  return embeddings_keskmised

def leia_sonaliik_ja_tagasta_kaal(artikkel, start, weights):
  pos = -1
  for sone in artikkel.morph_analysis:
    if sone.start == start:
      pos = sone.partofspeech
      if "A" in pos:
        pos = "A"
      else:
        pos = pos[0]
      break

  if pos == "V":
    return weights[0]
  if pos == "A":
    return weights[1]
  if pos in ["S", "A", "C", "U", "O", "N", "H", "P"]:
    return weights[2]
  
  if pos == -1:
    print("Sonaliigis viga!")
  
  return weights[3]
  

# Sündmuse fraasi sõnede embeddingute kaalutatud keskmine sõnaliikide järgi
# sõnade embedding'ute kaalutud keskmise arvutamine, kaalud sõnaliikide prioriteetide järgi
def event_embeddingute_kaalutatud_keskmised_sonaliigid(artikkel, weights=[0.8, 0.7, 0.5, 0.2], layer_name="bert_embeddings_fixed"):
  embeddings = dict()
  starts = dict()
  for g_event in artikkel.gold_events:
    for embedding in getattr(artikkel, layer_name):
      if embedding.start >= g_event.start and embedding.end <= g_event.end:
        if g_event.event_ID not in embeddings.keys():
          embeddings[g_event.event_ID] = [embedding.bert_embedding]
          starts[g_event.event_ID] = [embedding.start]
        else:
          embeddings[g_event.event_ID].append(embedding.bert_embedding)
          starts[g_event.event_ID].append(embedding.start)
  
  embeddings_keskmised = dict()
  for item in embeddings.items():
    kaalud = [0 for i in item[1]]
    mitmes = 0
    for g_event in artikkel.gold_events:
      if g_event.event_ID == item[0] and g_event.start == starts.get(item[0])[mitmes]:
        kaal = leia_sonaliik_ja_tagasta_kaal(artikkel, g_event.start, weights)
        kaalud[mitmes] = kaal
        mitmes += 1
    if artikkel.meta.get("filename") == "aja_pm_2000_10_04.tasak.a029.sol" and item[0] == "e23":
      print("muudan")
      for i in range(len(kaalud)):
        if kaalud[i] == 0:
          kaalud[i] = weights[3]

    if 0 in kaalud:
      print(kaalud)
      print(starts.get(item[0]))
      print(item[1])
      print(artikkel.meta.get("filename"))
    keskmine = np.average(item[1], 0, kaalud)
    embeddings_keskmised[item[0]] = keskmine
  return embeddings_keskmised

# Meetodid artiklist ajaväljendite embeddingute saamiseks

In [12]:
# Sündmuse fraasi sõnede embeddingute keskmine
def timex_embeddingute_keskmised(artikkel, layer_name="bert_embeddings_fixed"):
  embeddings = dict()
  for g_timex in artikkel.gold_timexes_phrases_ambiguous_fixed:
    for embedding in getattr(artikkel, layer_name):
      if embedding.start >= g_timex.start or embedding.end <= g_timex.end:
        # Bert embeddingsis: 19. XI 1997. See koosneb tegelt kahest ajaväljendist 19. XI ja 1997
        for timexID in g_timex.timex_ID:
          if timexID not in embeddings.keys():
            embeddings[timexID] = [embedding.bert_embedding]
          else:
            embeddings[timexID].append(embedding.bert_embedding)
  
  embeddings_keskmised = dict()
  for item in embeddings.items():
    keskmine = np.mean(item[1], 0)
    embeddings_keskmised[item[0]] = keskmine

  return embeddings_keskmised

# Meetodid seostetüüpide embeddingute jaoks

In [13]:
def tlink_event_DCT(eventID):
  return event_embeddings.get(eventID)

def concat_embedding_event_timex(eventID, timexID, event_embeddings, timex_embeddings):
  event = event_embeddings.get(eventID)[0]
  timex = timex_embeddings.get(timexID)[0]
  concat = np.concatenate((event, timex))
  return concat

def concat_embedding_event_event(eventID1, eventID2, event_embeddings):
  event1 = event_embeddings.get(eventID1)[0]
  event2 = event_embeddings.get(eventID2)[0]
  concat = np.concatenate((event1, event2))
  return concat

# Mudel - tlink-event-dct
X - embeddingud  
y - relation type

In [14]:
def loe_relatsioonid():
  data = None
  with open(drive_path + 'relations.pickle', 'rb') as fp:
      #data = json.load(fp)
      data = pickle.load(fp)
  return data
rels = loe_relatsioonid()

In [15]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import KFold

In [16]:
artikkel_event = []

def loo_X_ja_y_DCT(embedding_layer_name, event_embeddings_nr, with_form):
  rels = loe_relatsioonid()
  X = []
  y = []
  
  for artikkel in laetud_artiklid_embeddingutega:
    # Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer

    if event_embeddings_nr == 0:
      event_embeddings = main_event_embedding(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 1:
      event_embeddings = event_embeddingute_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 2:
      event_embeddings = event_embeddingute_kaalutatud_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 3:
      event_embeddings = event_embeddingute_kaalutatud_keskmised_sonaliigid(artikkel, layer_name=embedding_layer_name)
    
    for item in event_embeddings.items():
      event_relations = rels.get((artikkel.meta.get("filename"), item[0]))
      relation_to_DCT = None
      for relation in event_relations:
        if "t0" in relation:
          relation_to_DCT = relation
      if not relation_to_DCT:
        if False:
          print(artikkel.meta.get("filename"))
          print(event_relations)
          print("DCT relatsioon puudub!")
      else:
        X_data = item[1][0]
        if with_form:
          event_form = None
          start_end = None
          for g_event in artikkel.gold_events:
            if g_event.event_ID == item[0] and g_event.event_annotation.split(" ")[1] != 'multiword="true"':
              start_end = (g_event.start, g_event.end)
              break
          for morph in artikkel.morph_analysis:
            if morph.start == start_end[0] and morph.end == start_end[1]:
              event_form = morph.form[0]
              break
          if event_form:
            X_data = np.append(X_data, event_form)
          else:
            X_data = np.append(X_data, "-")

        X.append(X_data)
        y.append(relation_to_DCT[1])
        artikkel_event.append((artikkel.meta.get("filename"), item[0]))
  
  print(len(X))
  print(len(y))
  return X, y

X, y = loo_X_ja_y_DCT("bert_embeddings_fixed", 0, True)

4010
4010


In [17]:
print(len(X) == len(y))
print(y[0])
print(artikkel_event[0])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

def get_proportions(y_values, osakaal):
  
  y_set = set(y_values)
  vastus = "\n"
  for value in y_set:
    kogus = y_values.count(value)
    if osakaal:
      kogus = round(kogus / len(y_values), 4)
    vastus += value + ": " + str(kogus) + "\n"
  return vastus

print(set(y))
print("Train data class proportions", get_proportions(y_train, osakaal=False))
print("Test data class proportions", get_proportions(y_test, osakaal=False))
print("y data class proportions", get_proportions(y, osakaal=False))

True
OVERLAP-OR-AFTER
('aja_ml_2002_47.tasak.a006.sol', 'e1')
{'AFTER', 'BEFORE-OR-OVERLAP', 'OVERLAP-OR-AFTER', 'INCLUDES', 'VAGUE', 'IS_INCLUDED', 'SIMULTANEOUS', 'BEFORE'}
Train data class proportions 
AFTER: 417
BEFORE-OR-OVERLAP: 350
VAGUE: 468
INCLUDES: 805
OVERLAP-OR-AFTER: 170
IS_INCLUDED: 28
SIMULTANEOUS: 1
BEFORE: 1370

Test data class proportions 
AFTER: 46
BEFORE-OR-OVERLAP: 34
VAGUE: 53
INCLUDES: 98
OVERLAP-OR-AFTER: 20
IS_INCLUDED: 4
BEFORE: 146

y data class proportions 
AFTER: 463
BEFORE-OR-OVERLAP: 384
OVERLAP-OR-AFTER: 190
INCLUDES: 903
VAGUE: 521
IS_INCLUDED: 32
SIMULTANEOUS: 1
BEFORE: 1516



In [18]:
# Kommentaarina on accuracy (f1-score micro) väärtus
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

# --- Katsetavad mudelid ---
#clf = make_pipeline(StandardScaler(), LinearSVC(random_state=0, C=0.025))  # 0.59 - main_event_embedding(bert_embeddings)
#clf = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025)) # 0.64 - main_event_embedding(bert_embeddings)
#clf = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=len(set(y)) - 2)) # 0.61 - main_event_embedding(bert_embeddings)
#clf = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)) #0.50 - main_event_embedding(bert_embeddings)
#clf = make_pipeline(StandardScaler(), MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)) # 0.66 - main_event_embedding(bert_embeddings)

# Meetod, et jooksutada mudeli treenimist mitu korda, iga kord muutes mudeli mingit parameetrit.
def jooksuta_ja_muuda_mitu_korda(X, y):
  top1 = [0,0, None]
  top2 = [0,0, None]
  top3 = [0,0, None]

  alpha_value = 0.2
  c_value = 0.05
  for i in range(25):
    clf = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=c_value + i * c_value))
    tulemused = ristvalideeri(clf, X, y)

    f1 = f1_score(tulemused[0], tulemused[1], average="micro")
    if f1 > top1[0]:
      top3 = top2
      top2 = top1

      top1[0] = f1
      top1[1] = alpha_value + alpha_value * i
      top1[2] = classification_report(tulemused[0], tulemused[1])
    elif f1 > top2[0]:
      top3 = top2
      top2[0] = f1
      top2[1] = alpha_value + alpha_value * i
      top2[2] = classification_report(tulemused[0], tulemused[1])
    elif f1 > top3[0]:
      top3[0] = f1
      top3[1] = alpha_value + alpha_value * i
      top3[2] = classification_report(tulemused[0], tulemused[1])

    print(f1_score(tulemused[0], tulemused[1], average="micro"), f1_score(tulemused[0], tulemused[1], average="macro"), f1_score(tulemused[0], tulemused[1], average="weighted"), "Alpha value:", alpha_value + alpha_value * i)

  print(top1)
  print(top2)
  print(top3)
  return top1, top2, top3

def ristvalideeri(mudel, X, y):
  all_preds = []
  all_true = []

  all_acc = []
  all_recall = []
  all_prec = []
  all_f1 = []

  all_vabam_acc = []

  highest_accuracy = 0
  highest_accuracy_report = None

  c = list(zip(X, y))
  random.seed(10)
  random.shuffle(c)
  X, y = zip(*c)

  kf = KFold(n_splits=10, shuffle=True, random_state=2021)
  kf.get_n_splits(X)
  i = 0
  for train_index, test_index in kf.split(X):
    print("split", i)
    X_train_split = [X[i] for i in train_index]
    y_train_split = [y[i] for i in train_index]
    X_test_split = [X[i] for i in test_index]
    y_test_split = [y[i] for i in test_index]

    mudel.fit(X_train_split, y_train_split)
    y_pred = mudel.predict(X_test_split)

    all_preds.extend(y_pred)
    all_true.extend(y_test_split)

    all_acc.append(accuracy_score(y_pred, y_test_split))
    all_recall.append(recall_score(y_pred, y_test_split, average="micro"))
    all_prec.append(precision_score(y_pred, y_test_split, average="micro"))
    all_f1.append(f1_score(y_pred, y_test_split, average="micro"))
    all_vabam_acc.append(vabam_accuracy(y_pred, y_test_split))

    if accuracy_score(y_pred, y_test_split) > highest_accuracy:
      highest_accuracy_report = classification_report(y_pred, y_test_split)
      highest_accuracy = accuracy_score(y_pred, y_test_split)

    i += 1
  return (all_preds, all_true, all_acc, all_recall, all_prec, all_f1, highest_accuracy_report, all_vabam_acc)

# Ristvalideerimine ja tulemuste printimine
def prindi_tulemused(mudel, mudeli_nimi, X, y, kihi_nimi):
  tulemused = ristvalideeri(mudel, X, y)
  print("###", kihi_nimi, "-", round(f1_score(tulemused[0], tulemused[1], average="micro"),2), round(f1_score(tulemused[0], tulemused[1], average="macro"),2), round(f1_score(tulemused[0], tulemused[1], average="weighted"),2))
  print("```")
  print(mudeli_nimi)
  print("Parima alamhulga raport:")
  print(tulemused[6])
  print("--- Ristvalideerimise tulemused ---")
  print("Tulemuste keskmised (micro):")
  print("   Accuracy:  ", np.mean(tulemused[2]), "vabam:", np.mean(tulemused[7]))
  print("   Recall:    ", np.mean(tulemused[3]))
  print("   Precision: ", np.mean(tulemused[4]))
  print("   F1-score:  ", np.mean(tulemused[5]))
  print("Üle kõikide ennustuste")
  print("   Accuracy:", accuracy_score(tulemused[0], tulemused[1]), "vabam:", vabam_accuracy(tulemused[0], tulemused[1]))
  print("   Recall:   ", "micro -", recall_score(tulemused[0], tulemused[1], average="micro"))
  print("              macro -", recall_score(tulemused[0], tulemused[1], average="macro"))
  print("           weighted -", recall_score(tulemused[0], tulemused[1], average="weighted"))
  print("   Precision:   ", "micro -", precision_score(tulemused[0], tulemused[1], average="micro"))
  print("                 macro -", precision_score(tulemused[0], tulemused[1], average="macro"))
  print("              weighted -", precision_score(tulemused[0], tulemused[1], average="weighted"))
  print("   F1-score: ", "micro -", f1_score(tulemused[0], tulemused[1], average="micro"))
  print("              macro -", f1_score(tulemused[0], tulemused[1], average="macro"))
  print("           weighted -", f1_score(tulemused[0], tulemused[1], average="weighted"))
  print("```")
  
  return [mudeli_nimi, round(f1_score(tulemused[0], tulemused[1], average="micro"),2), round(f1_score(tulemused[0], tulemused[1], average="macro"),2), round(f1_score(tulemused[0], tulemused[1], average="weighted"),2)]



Vabama puhul:  
--- õige ennustus = 1  
--- õige 'OVERLAP-OR-AFTER' ja ennustab 'AFTER' = 0.5  
--- õige 'BEFORE-OR-OVERLAP' ja ennustab 'BEFORE' = 0.5  
--- vale ennustus = 0  

In [19]:
def vabam_accuracy(y_pred, y_test):
  y_pred_vabam = []
  for i in range(len(y_pred)):
    if y_pred[i] != y_test[i]:
      if y_pred[i] == 'AFTER' and y_test[i] == 'OVERLAP-OR-AFTER':
        y_pred_vabam.append(0.5)
      elif y_pred[i] == 'BEFORE' and y_test[i] == 'BEFORE-OR-OVERLAP':
        y_pred_vabam.append(0.5)
      else:
        y_pred_vabam.append(0)
    else:
      y_pred_vabam.append(1)

  y_pred_range = []
  for i in range(len(y_pred)):
    if y_pred[i] != y_test[i]:
        y_pred_range.append(0)
    else:
      y_pred_range.append(1)

  return sum(y_pred_vabam) / len(y_pred_vabam)

## Mudelite treenimine erinevate kihtidega ja multiword tüüpidega
Mudelite tulemused salvestatakse hilisemaks uurimiseks

In [None]:
# Mudelid
clf1 = make_pipeline(LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False))  # 0.59 - main_event_embedding(bert_embeddings)
clf2 = make_pipeline(SVC(random_state=0, kernel="linear", C=0.025)) # 0.64 - main_event_embedding(bert_embeddings)
#clf3 = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=len(set(y)) - 2)) # 0.61 - main_event_embedding(bert_embeddings)
#clf4 = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)) #0.50 - main_event_embedding(bert_embeddings)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)) # 0.66 - main_event_embedding(bert_embeddings)

nimed = ['LinearSVC(random_state=0, C=0.025)', 'SVC(random_state=0, kernel="linear", C=0.025)', 'MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)']
mudelid = [clf1,clf2,clf5]

kihid = ["bert_embeddings_fixed", "bert_embeddings_add_fixed", "bert_embeddings_all_add_fixed", "bert_embeddings_lastlayer_fixed", "bert_embeddings_penultimatelayer_fixed"]
# Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer
# Võimalikud multiword viisid: 0 - main_event_embedding
#                              1 - event_embeddingute_keskmised
#                              2 - event_embeddingute_kaalutatud_keskmised
#                              3 - event_embeddingute_kaalutatud_keskmised_sonaliigid

top3 = dict()
multiword_tyyp = {0:"main_event_embedding", 1:"event_embeddingute_keskmised", 2:"event_embeddingute_kaalutatud_keskmised", 3:"event_embeddingute_kaalutatud_keskmised_sonaliigid"}
# Iga multiword tüübi puhul
for i in range(4):
  print("--- multiword tüüp:", multiword_tyyp.get(i))
  # Proovi iga kihiga
  tyyp = dict()
  for kiht in kihid:
    print("------ kiht:", kiht)
    X, y = loo_X_ja_y_DCT(kiht, i)
    # Iga mudeliga
    mudelite_skoorid = []
    for j in range(len(mudelid)):
      clf = mudelid[j]
      m_nimi = nimed[j]
      print(m_nimi)
      mudeli_nimi_ja_f1 = prindi_tulemused(clf, m_nimi, X, y, kiht)
      mudelite_skoorid.append(mudeli_nimi_ja_f1)

    mudelite_skoorid.sort(key = lambda x: x[1], reverse=True)
    
    tyyp[kiht] = mudelite_skoorid[:3]
  
  top3[multiword_tyyp.get(i)] = tyyp

  with open(drive_path + 'top3_DCT.json', 'w') as fp:
    json.dump(top3, fp)

print(top3)

--- multiword tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.62 0.41 0.63
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.64      0.60      0.62        47
           BEFORE       0.81      0.85      0.83       155
BEFORE-OR-OVERLAP       0.67      0.57      0.61        46
         INCLUDES       0.67      0.65      0.66        98
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.15      0.11      0.12        19
            VAGUE       0.32      0.40      0.35        35

         accurac

  _warn_prf(average, modifier, msg_start, len(result))


   F1-score:  micro - 0.6246882793017456
              macro - 0.40813885458478266
           weighted - 0.6273694258233242
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_fixed - 0.62 0.41 0.62
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.53      0.56        51
           BEFORE       0.85      0.74      0.79       164
BEFORE-OR-OVERLAP       0.60      0.69      0.64        39
         INCLUDES       0.61      0.68      0.64        84
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.29      0.38      0.33        13
            VAGUE       0.46      0.50      0.48        50

         accuracy                           0.65       401
        macro avg       0.49      0.50      0.49       401
     weighted avg       0.68      0.65      0.66       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6194513715710723 vabam: 0.6397755610972568
   Recall:     0.6194513715710723
   Precision:  0.6194513715710723
   F1-score:   0.619451371571

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4139433054455899
           weighted - 0.6238267015325216
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_fixed - 0.63 0.41 0.64
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.57      0.57      0.57        46
           BEFORE       0.88      0.75      0.81       166
BEFORE-OR-OVERLAP       0.58      0.76      0.66        34
         INCLUDES       0.67      0.66      0.66        96
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.18      0.30      0.22        10
            VAGUE       0.46      0.51      0.49        49

         accuracy                           0.67       401
        macro avg       0.48      0.51      0.49       401
     weighted avg       0.70      0.67      0.68       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6309226932668329 vabam: 0.650997506234414
   Recall:     0.6309226932668329
   Precision:  0.6309226932668329
   F1-score:   0.630

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.40757721773769523
           weighted - 0.6356384841388666
```
------ kiht: bert_embeddings_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_add_fixed - 0.59 0.38 0.59
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.39      0.40      0.39        43
           BEFORE       0.81      0.80      0.80       166
BEFORE-OR-OVERLAP       0.67      0.53      0.59        49
         INCLUDES       0.57      0.68      0.62        80
      IS_INCLUDED       0.25      0.25      0.25         4
 OVERLAP-OR-AFTER       0.15      0.11      0.13        18
     SIMULTANEOUS       0.00      0.00      0.00         2
            VAGUE       0.32      0.36      0.34        39

         accuracy                           0.61       401
        macro avg       0.39      0.39      0.39       401
     weighted avg       0.61      0.61      0.61       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5870324189526184 vabam: 0.605236

  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.59 0.38 0.59
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.54      0.56        50
           BEFORE       0.82      0.73      0.77       159
BEFORE-OR-OVERLAP       0.62      0.65      0.64        43
         INCLUDES       0.61      0.63      0.62        90
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.24      0.36      0.29        11
            VAGUE       0.46      0.52      0.49        48

         accuracy                           0.64       401
        macro avg       0.48      0.49      0.48       401
     weighted avg       0.66      0.64      0.65       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5907730673316709 vabam: 0.6125935162094762
   Recall:     0.5907730673316709
   Precision:  0.5907730673316709
   F1-score:   0.59077306

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.3845041048861356
           weighted - 0.5938402606094044
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_add_fixed - 0.64 0.42 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.45      0.45      0.45        44
           BEFORE       0.87      0.82      0.84       172
BEFORE-OR-OVERLAP       0.67      0.67      0.67        39
         INCLUDES       0.65      0.74      0.69        84
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.15      0.12      0.13        17
            VAGUE       0.36      0.36      0.36        45

         accuracy                           0.67       401
        macro avg       0.45      0.45      0.45       401
     weighted avg       0.67      0.67      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6421446384039899 vabam: 0.6620947630922693
   Recall:     0.6421446384

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.41528596853222166
           weighted - 0.6483296850652073
```
------ kiht: bert_embeddings_all_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3
split 4
split 5
split 6
split 7


  _warn_prf(average, modifier, msg_start, len(result))


split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_all_add_fixed - 0.56 0.35 0.55
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.53      0.56        51
           BEFORE       0.79      0.77      0.78       145
BEFORE-OR-OVERLAP       0.62      0.62      0.62        45
         INCLUDES       0.53      0.59      0.56        85
      IS_INCLUDED       0.00      0.00      0.00         5
 OVERLAP-OR-AFTER       0.12      0.13      0.12        15
     SIMULTANEOUS       0.00      0.00      0.00         8
            VAGUE       0.39      0.45      0.42        47

         accuracy                           0.60       401
        macro avg       0.38      0.39      0.38       401
     weighted avg       0.59      0.60      0.59       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5583541147132169 vabam: 0.5768079800498753
   Recall:     0.5583541147132169
   Prec

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.38663625864987816
           weighted - 0.591478793694201
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_all_add_fixed - 0.63 0.4 0.64
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.42      0.49      0.45        41
           BEFORE       0.88      0.84      0.86       167
BEFORE-OR-OVERLAP       0.71      0.59      0.65        34
         INCLUDES       0.64      0.67      0.65        84
      IS_INCLUDED       0.14      1.00      0.25         1
 OVERLAP-OR-AFTER       0.16      0.23      0.19        13
            VAGUE       0.54      0.46      0.50        61

         accuracy                           0.67       401
        macro avg       0.50      0.61      0.51       401
     weighted avg       0.69      0.67      0.68   

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4036951951442086
           weighted - 0.6388923695624156
```
------ kiht: bert_embeddings_lastlayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.62 0.41 0.62
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.55      0.49      0.52        49
           BEFORE       0.83      0.80      0.82       169
BEFORE-OR-OVERLAP       0.67      0.62      0.64        42
         INCLUDES       0.60      0.65      0.62        88
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.08      0.07      0.07        14
           

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4054890920286416
           weighted - 0.6215453569849911
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_lastlayer_fixed - 0.63 0.42 0.63
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.56      0.58        50
           BEFORE       0.84      0.72      0.77       166
BEFORE-OR-OVERLAP       0.58      0.62      0.60        42
         INCLUDES       0.62      0.67      0.64        87
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.35      0.46      0.40        13
            VAGUE       0.44      0.56      0.49        43

         accuracy                           0.65       401
        macro avg       0.49      0.51      0.50       401
     weighted avg       0.68      0.65      0.66       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6254364089775561 vabam: 0.6456359102244389
   Recall:     0.6254364089775561
   Precision:  0.6254364089775561
   F1-score:   0.62

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4203529138816024
           weighted - 0.6308732612512368
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.64 0.42 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.55      0.45      0.49        53
           BEFORE       0.85      0.83      0.84       167
BEFORE-OR-OVERLAP       0.62      0.77      0.69        31
         INCLUDES       0.63      0.69      0.66        87
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.15      0.12      0.13        17
            VAGUE       0.41      0.41      0.41        44

         accuracy                           0.66       401
        macro avg       0.46      0.47      0.46       401
     weighted avg       0.66      0.66      0.66

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4181890784570042
           weighted - 0.6455352707328904
```
------ kiht: bert_embeddings_penultimatelayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.63 0.4 0.64
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.62      0.62        45
           BEFORE       0.82      0.72      0.77       163
BEFORE-OR-OVERLAP       0.53      0.77      0.63        31
         INCLUDES       0.68      0.64      0.66       100
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.35      0.43      0.39        14
            VAGUE       0.46      0.52      0.49        48

         accuracy                           0.66       401
        macro avg       0.49      0.53      0.51       401
     weighted avg       0.68      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6321695760598505 vabam: 0.650997506234414
   Recall:     0.6321695760598505
   Precision:  0.6321695760598505
   F1-score:   0.63216957

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.39908861603100476
           weighted - 0.6381638067934172
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.64 0.41 0.65
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.65      0.62      0.64        48
           BEFORE       0.86      0.75      0.80       163
BEFORE-OR-OVERLAP       0.58      0.74      0.65        35
         INCLUDES       0.74      0.69      0.72       101
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.18      0.33      0.23         9
            VAGUE       0.44      0.53      0.48        45

         accuracy                           0.69       401
        macro avg       0.49      0.53      0.50       401
     weighted avg       0.72      0.69      0.70       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6433915211970075 vabam: 0.6652119700748129
   Recall:     0.6433915211970075
   Precision:  0.6433915211970075
   F1-score:

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.40819960070689043
           weighted - 0.6519079979318372
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.65 0.43 0.66
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.69      0.66        42
           BEFORE       0.87      0.76      0.81       163
BEFORE-OR-OVERLAP       0.60      0.82      0.69        33
         INCLUDES       0.65      0.68      0.66        90
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.41      0.50      0.45        14
            VAGUE       0.54      0.49      0.51        59

         accuracy                           0.69       401
        macro avg       0.53      0.56      0.54       401
     weighted avg       0.71      0.69      0.70       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6536159600997506 vabam: 0.6731920199501247
   Recall:     0.6536159600997506
   Precision:  0.6536159600997506
  

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.42668210178871585
           weighted - 0.6588295447927544
```
--- multiword tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.63 0.4 0.63
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.57      0.58        46
           BEFORE       0.83      0.84      0.83       159
BEFORE-OR-OVERLAP       0.64      0.57      0.60        44
         INCLUDES       0.71      0.71      0.71        95
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.15      0.13   

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4065791016979743
           weighted - 0.6188569243267831
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.63 0.42 0.63
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.46      0.56      0.51        41
           BEFORE       0.81      0.79      0.80       148
BEFORE-OR-OVERLAP       0.68      0.58      0.63        48
         INCLUDES       0.75      0.72      0.73       100
      IS_INCLUDED       0.33      1.00      0.50         1
 OVERLAP-OR-AFTER       0.10      0.18      0.13        11
            VAGUE       0.54      0.48      0.51        52

         accuracy                           0.67       401
        macro avg       0.53      0.62      0.54       401
     weighted avg       0.69      0.67      0.68       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6291770573566086 vabam: 0.6487531172069825
   Recall:     0.629177

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4170005286801924
           weighted - 0.633596919924856
```
------ kiht: bert_embeddings_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.58 0.37 0.58
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.61      0.61        46
           BEFORE       0.79      0.70      0.74       159
BEFORE-OR-OVERLAP       0.56      0.68      0.61        37
         INCLUDES       0.56      0.62      0.59        85
      IS_INCLUDED       0.00      0.00      0.00         3
 OVERLAP-OR-AFTER       0.41      0.39      0.40        18
     SIMULTANEOUS       0.00      0.00      0.00         1
            VAGUE       0.46      0.48      0.47        52

         accuracy                           0.62       401
        macro avg       0.42      0.44      0.43       401
     weighted avg       0.63      0.62      0.63       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5845386533665835 vabam: 0.6017456359102245
   Recall:     0.5845386533665835
   Precisio

  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.59 0.39 0.6
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.53      0.56        51
           BEFORE       0.81      0.72      0.76       159
BEFORE-OR-OVERLAP       0.62      0.61      0.62        46
         INCLUDES       0.63      0.69      0.66        86
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.29      0.31      0.30        16
            VAGUE       0.41      0.51      0.45        43

         accuracy                           0.64       401
        macro avg       0.48      0.48      0.48       401
     weighted avg       0.66      0.64      0.65       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5935162094763092 vabam: 0.6134663341645885
   Recall:     0.5935162094763092
   Precision:  0.5935162094763092
   F1-score:   0.593516209

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.3909459663020714
           weighted - 0.5977825659028942
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_add_fixed - 0.65 0.41 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.52      0.51        42
           BEFORE       0.88      0.83      0.85       172
BEFORE-OR-OVERLAP       0.64      0.66      0.65        38
         INCLUDES       0.64      0.74      0.69        82
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.15      0.11      0.13        18
            VAGUE       0.41      0.37      0.39        49

         accuracy                           0.67       401
        macro avg       0.46      0.46      0.46       401
     weighted avg       0.68      0.67      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6453865336658353 vabam: 0.6655860349127182
   Recall:     0.6453865336

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4102804354232667
           weighted - 0.6529152943905908
```
------ kiht: bert_embeddings_all_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_all_add_fixed - 0.56 0.35 0.55
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.57      0.51      0.54        51
           BEFORE       0.78      0.76      0.77       147
BEFORE-OR-OVERLAP       0.60      0.68      0.64        40
         INCLUDES       0.59      0.63      0.61        87
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.29      0.22      0.25        23
     SIMULTANEOUS       0.00      0.00      0.00         4
            VAGUE       0.39      0.44      0.41        48

         accuracy                           0.61       401
        macro avg       0.40      0.40      0.40       401
     weighted avg       0.61      0.61      0.61       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5583541147132169 vabam: 0.5763092269326684
   Recall:     0.5583541147132169
   Prec

  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_all_add_fixed - 0.59 0.39 0.6
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.47      0.49      0.48        47
           BEFORE       0.82      0.76      0.79       159
BEFORE-OR-OVERLAP       0.62      0.59      0.60        41
         INCLUDES       0.67      0.63      0.65        95
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.24      0.28      0.26        18
            VAGUE       0.34      0.44      0.38        41

         accuracy                           0.63       401
        macro avg       0.45      0.45      0.45       401
     weighted avg       0.65      0.63      0.64       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5932668329177057 vabam: 0.6144638403990025
   Recall:     0.593266832

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.38691205986056076
           weighted - 0.5980484989914261
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6


  _warn_prf(average, modifier, msg_start, len(result))


split 7
split 8
split 9
### bert_embeddings_all_add_fixed - 0.64 0.4 0.64
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.42      0.50      0.46        42
           BEFORE       0.88      0.80      0.84       158
BEFORE-OR-OVERLAP       0.61      0.66      0.63        38
         INCLUDES       0.73      0.71      0.72        98
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.15      0.25      0.19        12
            VAGUE       0.59      0.51      0.55        53

         accuracy                           0.68       401
        macro avg       0.48      0.49      0.48       401
     weighted avg       0.71      0.68      0.69       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6364089775561098 vabam: 0.6559850374064838
   Recall:     0.6364089775561098
   Precision:  0.6364089775561

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.39888328742920504
           weighted - 0.6428853345961407
```
------ kiht: bert_embeddings_lastlayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.62 0.4 0.62
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.52      0.44      0.48        52
           BEFORE       0.81      0.80      0.81       165
BEFORE-OR-OVERLAP       0.67      0.68      0.68        38
         INCLUDES       0.63      0.69      0.66        87
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.15      0.13      0.14        15
           

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.39751078149308794
           weighted - 0.6215004482126056
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.62 0.41 0.62
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.57      0.54      0.55        52
           BEFORE       0.88      0.80      0.84       163
BEFORE-OR-OVERLAP       0.54      0.66      0.59        32
         INCLUDES       0.71      0.61      0.66       105
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.10      0.20      0.13        10
            VAGUE       0.38      0.53      0.44        38

         accuracy                           0.66       401
        macro avg       0.45      0.48      0.46       401
     weighted avg       0.70      0.66      0.68       401

--- Ris

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4096337990882809
           weighted - 0.6241369215660364
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.63 0.42 0.64
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.52      0.48      0.50        48
           BEFORE       0.83      0.81      0.82       165
BEFORE-OR-OVERLAP       0.64      0.68      0.66        37
         INCLUDES       0.66      0.74      0.70        85
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.15      0.12      0.13        17
            VAGUE       0.43      0.40      0.42        47

         accuracy                           0.66       401
        macro avg       0.46      0.46      0.46       401
     weighted avg       0.66      0.66      0.66

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4184500229795739
           weighted - 0.6367229252433098
```
------ kiht: bert_embeddings_penultimatelayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_penultimatelayer_fixed - 0.63 0.39 0.63
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.55      0.47      0.51        51
           BEFORE       0.85      0.84      0.85       164
BEFORE-OR-OVERLAP       0.62      0.57      0.59        42
         INCLUDES       0.62      0.67      0.64        88
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.08      0.08      0.08        

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.3854097479293821
           weighted - 0.6321868739141226
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.64 0.41 0.65
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.58      0.60        50
           BEFORE       0.85      0.74      0.79       163
BEFORE-OR-OVERLAP       0.58      0.72      0.64        36
         INCLUDES       0.69      0.68      0.68        96
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.18      0.27      0.21        11
            VAGUE       0.43      0.51      0.46        45

         accuracy                           0.67       401
        macro avg       0.48      0.50      0.49       401
     weighted avg       0.70      0.67      0.68       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6403990024937655 vabam: 0.6619700748129677
   Recall:     0.6403990024937655
   Precision:  0.6403990024937655
   F1-score:

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.412481770798416
           weighted - 0.6479236242745902
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_penultimatelayer_fixed - 0.65 0.41 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.48      0.48      0.48        44
           BEFORE       0.87      0.85      0.86       166
BEFORE-OR-OVERLAP       0.64      0.74      0.68        34
         INCLUDES       0.69      0.75      0.72        88
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.23      0.20      0.21        15
            VAGUE       0.45      0.37      0.41        54

         accuracy                           0.69       401
        macro avg       0.48      0.48      0.48       401
     weighted avg       0.69      0.69      0.69       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6483790523690774 vabam: 0.6675810473815461
   Recall:    

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4088657137535836
           weighted - 0.654477135701284
```
--- multiword tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.63 0.41 0.63
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.59      0.60        46
           BEFORE       0.82      0.84      0.83       159
BEFORE-OR-OVERLAP       0.64      0.61      0.62        41
         INCLUDES       0.69      0.66      0.68       100
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.15   

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4086417515357209
           weighted - 0.6262331099805853
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.62 0.42 0.63
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.55      0.51      0.53        47
           BEFORE       0.81      0.82      0.82       160
BEFORE-OR-OVERLAP       0.69      0.59      0.64        46
         INCLUDES       0.65      0.73      0.69        85
      IS_INCLUDED       0.25      1.00      0.40         1
 OVERLAP-OR-AFTER       0.08      0.07      0.07        15
            VAGUE       0.39      0.36      0.37        47

         accuracy                           0.66       401
        macro avg       0.49      0.58      0.50       401
     weighted avg       0.66      0.66      0.66       401

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4172121158456935
           weighted - 0.6255699743504212
```
------ kiht: bert_embeddings_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.59 0.37 0.58
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.58      0.60        48
           BEFORE       0.78      0.71      0.74       156
BEFORE-OR-OVERLAP       0.58      0.63      0.60        41
         INCLUDES       0.52      0.62      0.57        79
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.41      0.37      0.39        19
     SIMULTANEOUS       0.00      0.00      0.00         1
            VAGUE       0.44      0.44      0.44        55

         accuracy                           0.61       401
        macro avg       0.42      0.42      0.42       401
     weighted avg       0.62      0.61      0.61       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5860349127182044 vabam: 0.6033665835411471
   Recall:     0.5860349127182044
   Precisio

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.38732377647428795
           weighted - 0.5950088987143708
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_add_fixed - 0.64 0.41 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.48      0.49        46
           BEFORE       0.88      0.82      0.85       174
BEFORE-OR-OVERLAP       0.62      0.69      0.65        35
         INCLUDES       0.63      0.71      0.67        85
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.08      0.08      0.08        12
            VAGUE       0.36      0.33      0.34        49

         accuracy                           0.66       401
        macro avg       0.44      0.44      0.44       401
     weighted avg       0.67      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6384039900249376 vabam: 0.6579800498753119
   Recall:     0.6384039900

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.40569912629537724
           weighted - 0.6458093879417435
```
------ kiht: bert_embeddings_all_add_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_all_add_fixed - 0.56 0.35 0.55
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.50      0.55        56
           BEFORE       0.76      0.75      0.76       144
BEFORE-OR-OVERLAP       0.60      0.66      0.63        41
         INCLUDES       0.57      0.62      0.60        87
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.24      0.24      0.24        17
     SIMULTANEOUS       0.00      0.00      0.00         6
            VAGUE       0.39      0.44      0.41        48

         accuracy                           0.60       401
        macro avg       0.40      0.40      0.40       401
     weighted avg       0.60      0.60      0.60       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5558603491271821 vabam: 0.5735660847880298
   Recall:     0.5558603491271821
   Prec

  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_all_add_fixed - 0.59 0.39 0.59
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.52      0.50      0.51        52
           BEFORE       0.80      0.74      0.77       156
BEFORE-OR-OVERLAP       0.66      0.61      0.64        44
         INCLUDES       0.61      0.65      0.63        91
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.10      0.14      0.12        14
            VAGUE       0.37      0.40      0.39        42

         accuracy                           0.62       401
        macro avg       0.44      0.44      0.44       401
     weighted avg       0.63      0.62      0.62       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5882793017456358 vabam: 0.6102244389027431
   Recall:     0.58827930

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.38847940369547596
           weighted - 0.5930328599250713
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_all_add_fixed - 0.63 0.39 0.64
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.54      0.50      0.52        50
           BEFORE       0.87      0.79      0.83       155
BEFORE-OR-OVERLAP       0.60      0.69      0.64        39
         INCLUDES       0.55      0.64      0.59        81
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.12      0.20      0.15        10
            VAGUE       0.59      0.48      0.53        66

         accuracy                           0.65       401
        macro avg       0.47      0.47      0.47       401
     weighted avg       0.67      0.65      0.66       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6286783042394015 vabam: 0.6491271820448878
   Recall:     0.6286783042394015
   Precision:  0.6286783042394015
   F1-score

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.3924896082514693
           weighted - 0.6359580996315026
```
------ kiht: bert_embeddings_lastlayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.62 0.4 0.62
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.52      0.46      0.49        50
           BEFORE       0.82      0.81      0.81       165
BEFORE-OR-OVERLAP       0.67      0.67      0.67        39
         INCLUDES       0.62      0.65      0.63        91
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.08      0.08      0.08        12
            

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4006310746760907
           weighted - 0.6199639402538023
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_lastlayer_fixed - 0.62 0.42 0.63
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.60      0.59        45
           BEFORE       0.84      0.72      0.77       166
BEFORE-OR-OVERLAP       0.58      0.62      0.60        42
         INCLUDES       0.64      0.69      0.66        87
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.41      0.37      0.39        19
            VAGUE       0.46      0.60      0.52        42

         accuracy                           0.66       401
        macro avg       0.50      0.51      0.51       401
     weighted avg       0.68      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6214463840399003 vabam: 0.6413965087281797
   Recall:     0.6214463840399003
   Precision:  0.6214463840399003
   F1-score:   0.62

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.41751655766305473
           weighted - 0.6265767243128108
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_lastlayer_fixed - 0.64 0.42 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.41      0.50      0.45        40
           BEFORE       0.90      0.83      0.86       161
BEFORE-OR-OVERLAP       0.69      0.82      0.75        33
         INCLUDES       0.73      0.61      0.66       109
      IS_INCLUDED       0.00      0.00      0.00         1
 OVERLAP-OR-AFTER       0.14      0.23      0.18        13
            VAGUE       0.40      0.48      0.43        44

         accuracy                           0.67       401
        macro avg       0.47      0.49      0.48       401
     weighted avg       0.71      0.67      0.6

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4218575767543057
           weighted - 0.6461155852063313
```
------ kiht: bert_embeddings_penultimatelayer_fixed
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.63 0.39 0.64
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.64      0.64        45
           BEFORE       0.82      0.73      0.77       160
BEFORE-OR-OVERLAP       0.53      0.73      0.62        33
         INCLUDES       0.68      0.65      0.67        98
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.35      0.40      0.38        15
            VAGUE       0.44      0.48      0.46        50

         accuracy                           0.66       401
        macro avg       0.50      0.52      0.50       401
     weighted avg       0.68      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.629925187032419 vabam: 0.6485037406483791
   Recall:     0.629925187032419
   Precision:  0.629925187032419
   F1-score:   0.629925187

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.3889797432871909
           weighted - 0.6358849834384238
```
SVC(random_state=0, kernel="linear", C=0.025)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_penultimatelayer_fixed - 0.64 0.41 0.65
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.56      0.59        52
           BEFORE       0.85      0.75      0.80       159
BEFORE-OR-OVERLAP       0.58      0.74      0.65        35
         INCLUDES       0.70      0.67      0.68        99
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.18      0.27      0.21        11
            VAGUE       0.44      0.53      0.48        45

         accuracy                           0.67       401
        macro avg       0.48      0.50      0.49       401
     weighted avg       0.70      0.67      0.68       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6438902743142144 vabam: 0.6659600997506235
   Recall:     0.6438902743142144
   Precision:  0.6438902743142144
   F1-score:

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.414343626194014
           weighted - 0.6508037795586813
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_penultimatelayer_fixed - 0.64 0.41 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50        44
           BEFORE       0.86      0.83      0.85       168
BEFORE-OR-OVERLAP       0.67      0.70      0.68        37
         INCLUDES       0.63      0.71      0.67        84
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.15      0.12      0.14        16
            VAGUE       0.45      0.38      0.42        52

         accuracy                           0.67       401
        macro avg       0.47      0.47      0.47       401
     weighted avg       0.68      0.67      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.643640897755611 vabam: 0.6624688279301746
   Recall:     

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4084428142717531
           weighted - 0.6489303341272427
```
--- multiword tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
muudan
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_fixed - 0.63 0.41 0.63
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.57      0.54      0.56        46
           BEFORE       0.83      0.84      0.83       159
BEFORE-OR-OVERLAP       0.64      0.58      0.61        43
         INCLUDES       0.69      0.68      0.69        97
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-

  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_fixed - 0.62 0.41 0.62
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.52      0.56        54
           BEFORE       0.87      0.74      0.80       166
BEFORE-OR-OVERLAP       0.58      0.68      0.63        38
         INCLUDES       0.61      0.70      0.65        81
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.29      0.36      0.32        14
            VAGUE       0.48      0.54      0.51        48

         accuracy                           0.66       401
        macro avg       0.49      0.51      0.50       401
     weighted avg       0.69      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6184538653366584 vabam: 0.6387780548628429
   Recall:     0.6184538653366584
   Precision:  0.6184538653366584
   F1-score:   0.618453865336

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.4089282985063718
           weighted - 0.6227207396186958
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_fixed - 0.62 0.4 0.62
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.52      0.57      0.55        42
           BEFORE       0.85      0.75      0.79       161
BEFORE-OR-OVERLAP       0.56      0.68      0.61        37
         INCLUDES       0.71      0.63      0.67       107
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.18      0.38      0.24         8
            VAGUE       0.43      0.50      0.46        46

         accuracy                           0.65       401
        macro avg       0.46      0.50      0.47       401
     weighted avg       0.69      0.65      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6197007481296757 vabam: 0.6389027431421448
   Recall:     0.6197007481296757
   Precision:  0.6197007481296757
   F1-score:   0.619

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.39757106870613423
           weighted - 0.6236377906788707
```
------ kiht: bert_embeddings_add_fixed
muudan
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8


  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.58 0.37 0.58
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.62      0.62        47
           BEFORE       0.80      0.73      0.76       157
BEFORE-OR-OVERLAP       0.56      0.66      0.60        38
         INCLUDES       0.55      0.63      0.59        82
      IS_INCLUDED       0.00      0.00      0.00         2
 OVERLAP-OR-AFTER       0.29      0.31      0.30        16
     SIMULTANEOUS       0.00      0.00      0.00         2
            VAGUE       0.48      0.46      0.47        57

         accuracy                           0.63       401
        macro avg       0.41      0.43      0.42       401
     weighted avg       0.63      0.63      0.63       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5827930174563591 vabam: 0.6003740648379053
   Recall:     0.5827930174563591
   Precisio

  _warn_prf(average, modifier, msg_start, len(result))


split 9
### bert_embeddings_add_fixed - 0.59 0.38 0.59
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.57      0.53      0.55        49
           BEFORE       0.80      0.74      0.77       155
BEFORE-OR-OVERLAP       0.62      0.58      0.60        48
         INCLUDES       0.57      0.63      0.60        86
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.29      0.31      0.30        16
            VAGUE       0.43      0.49      0.46        47

         accuracy                           0.62       401
        macro avg       0.47      0.47      0.47       401
     weighted avg       0.64      0.62      0.63       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5907730673316708 vabam: 0.6110972568578553
   Recall:     0.5907730673316708
   Precision:  0.5907730673316708
   F1-score:   0.59077306

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.38485449362612917
           weighted - 0.5942642531765027
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
split 0
split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9
### bert_embeddings_add_fixed - 0.64 0.41 0.65
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.44      0.54      0.48        39
           BEFORE       0.86      0.82      0.84       166
BEFORE-OR-OVERLAP       0.75      0.68      0.71        31
         INCLUDES       0.64      0.64      0.64        88
      IS_INCLUDED       0.29      0.67      0.40         3
 OVERLAP-OR-AFTER       0.16      0.21      0.18        14
            VAGUE       0.50      0.43      0.46        60

         accuracy                           0.66       401
        macro avg       0.52      0.57      0.53       401
     weighted avg       0.68      0.66      0.67       401

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.6396508728179552 vabam: 0.6593516209476309
   Recall:     0.6396508728

  _warn_prf(average, modifier, msg_start, len(result))


              macro - 0.41255385942956674
           weighted - 0.6476634103561463
```
------ kiht: bert_embeddings_all_add_fixed
muudan
aja_pm_2000_10_04.tasak.a047.sol
[['e19', 'BEFORE', 'e20', ''], ['e18', 'OVERLAP-OR-AFTER', 'e19', '']]
POLE DCT RELATIONIT
aja_sloleht_2007_12_17.tasak.a003.sol
[['e56', 'VAGUE', 'e57', '']]
POLE DCT RELATIONIT
LinearSVC(random_state=0, C=0.025)
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3


In [None]:
import json
with open(drive_path + 'top3_DCT.json', 'r') as fp:
    top3 = json.load(fp)

In [None]:
for key in top3.keys():
  print("------------------------ multiword_tüüp:", key)
  for kiht in top3.get(key).keys():
    print("------ kiht:", kiht)
    for mudeli_tulemus in top3.get(key).get(kiht):
      print("### ", mudeli_tulemus[1], mudeli_tulemus[2], mudeli_tulemus[3], "-", mudeli_tulemus[0])
  print()

------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.63 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.62 0.41 0.63 - LinearSVC(random_state=0, C=0.025)
###  0.62 0.41 0.62 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.64 0.42 0.65 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.59 0.38 0.59 - LinearSVC(random_state=0, C=0.025)
###  0.59 0.38 0.59 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_all_add_fixed
###  0.63 0.4 0.64 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.59 0.39 0.59 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.56 0.35 0.55 - LinearSVC(random_state=0, C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.64 0.42 0.65 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.63 0.42 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.62 0.41 0.62 - 

## Top3 mudelid erinevate kombinatsioonidega



```
------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.63 0.42 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.63 0.4 0.63 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.41 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.64 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.39 0.59 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.59 0.39 0.6 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_all_add_fixed
###  0.63 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.39 0.59 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.55 0.35 0.54 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.63 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.4 0.62 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.62 0.41 0.62 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.65 0.43 0.66 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.64 0.41 0.65 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.63 0.4 0.64 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
###  0.63 0.42 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.62 0.41 0.62 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.62 0.41 0.63 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
------ kiht: bert_embeddings_add_fixed
###  0.63 0.4 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.39 0.59 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.58 0.38 0.58 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.64 0.4 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.6 0.39 0.6 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.56 0.35 0.55 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.63 0.42 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.4 0.62 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.62 0.41 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.65 0.41 0.65 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.64 0.42 0.65 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.63 0.39 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
###  0.63 0.42 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.63 0.42 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.41 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.64 0.41 0.65 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.6 0.4 0.6 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.59 0.38 0.58 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.64 0.4 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.38 0.59 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.55 0.35 0.55 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.64 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.41 0.62 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.62 0.42 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.64 0.42 0.65 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.64 0.41 0.65 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.63 0.39 0.64 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
###  0.63 0.42 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.63 0.42 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.41 0.62 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.64 0.4 0.65 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.38 0.6 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.58 0.38 0.58 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.64 0.41 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.59 0.39 0.6 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.56 0.36 0.55 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.64 0.42 0.64 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.62 0.42 0.62 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.62 0.41 0.63 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.64 0.41 0.65 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.64 0.41 0.65 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.63 0.39 0.63 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
```



## Valitud mudel

In [None]:
X, y = loo_X_ja_y_DCT("bert_embeddings_penultimatelayer_fixed", 0, False)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=5000, random_state=0))
#clf5 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025, class_weight="balanced"))
tulemused = ristvalideeri(clf5, X, y)
#tulemused = prindi_tulemused(clf5, 'MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=5000, random_state=0)', X, y, "bert_embeddings_penultimatelayer_fixed")

4010
4010
4010
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [None]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

4010
9
32
0
1
                   precision    recall  f1-score   support

            AFTER       0.51      0.49      0.50       480
           BEFORE       0.85      0.82      0.84      1586
BEFORE-OR-OVERLAP       0.60      0.73      0.66       318
         INCLUDES       0.67      0.63      0.65       963
      IS_INCLUDED       0.16      0.56      0.24         9
 OVERLAP-OR-AFTER       0.16      0.22      0.19       140
     SIMULTANEOUS       0.00      0.00      0.00         0
            VAGUE       0.44      0.45      0.45       514

         accuracy                           0.66      4010
        macro avg       0.43      0.49      0.44      4010
     weighted avg       0.67      0.66      0.66      4010

Micro
- recall: 0.6581047381546135
- precsision: 0.6581047381546135
- f1-score: 0.6581047381546135
Macro


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- recall: 0.4867538809185814
- precsision: 0.42542095091786153
- f1-score: 0.440641372531819
Weighted
- recall: 0.6581047381546135
- precsision: 0.6716742182407544
- f1-score: 0.6636658436725187
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.6581047381546135


# Mudel - tlink-event-timex

In [20]:
def loo_X_ja_y_eventtimex(embedding_layer_name, event_embeddings_nr): 
  rels = loe_relatsioonid()
  artikkel_event = []
  X = []
  y = []

  for artikkel in laetud_artiklid_embeddingutega:
    leitud_suhteid = 0
    # Võimalikud kihid: bert_embeddings_fixed, bert_embeddings_add_fixed, bert_embeddings_all_add_fixed, bert_embeddings_lastlayer_fixed, bert_embeddings_penultimatelayer_fixed

    if event_embeddings_nr == 0:
      event_embeddings = main_event_embedding(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 1:
      event_embeddings = event_embeddingute_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 2:
      event_embeddings = event_embeddingute_kaalutatud_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 3:
      event_embeddings = event_embeddingute_kaalutatud_keskmised_sonaliigid(artikkel, layer_name=embedding_layer_name)

    timex_embeddings = timex_embeddingute_keskmised(artikkel, layer_name=embedding_layer_name)
    for item in event_embeddings.items():
      event_relations = rels.get((artikkel.meta.get("filename"), item[0]))
      leitud = []
      if not event_relations:
        print(artikkel.meta.get("filename"),item[0])
      for relation in event_relations:
        # käi relation läbi ja vaata, kas sisaldab ajaväljendi ID-d (t*)
        if (relation[2] != "t0" and relation[2][0] == "t"):
          timex_ID = None
          if relation[0][0] == "t":
            timex_ID = relation[0]
          else:
            timex_ID = relation[2]
          event_timex_embedding = concat_embedding_event_timex(item[0], timex_ID, event_embeddings, timex_embeddings)
          leitud.append((event_timex_embedding, relation[1]))
      if leitud == []:
        prindi = False
        if prindi:
          print(artikkel.meta.get("filename"))
          print(event_relations)
          print("POLE event_timex RELATIONIT")
      else:
        for leid in leitud:
          X.append(leid[0])
          y.append(leid[1])
          leitud_suhteid += 1
        artikkel_event.append((artikkel.meta.get("filename"), item[0]))

  return X, y

X, y = loo_X_ja_y_eventtimex("bert_embeddings_fixed", 0)

## Kuldmärgendite ja embeddingute arvude kontroll

In [22]:
gt_arv = 0
embt_arv = 0
ge_arv = 0
embe_arv = 0
for art in laetud_artiklid_embeddingutega:
  embs = timex_embeddingute_keskmised(art)
  embt_arv += len(embs.keys())
  gt_arv += len(art.gold_timexes_phrases_ambiguous_fixed)

  eventIDs = set()
  for event in art.gold_events:
    eventIDs.add(event.event_ID)
  ge_arv += len(eventIDs)
  e_main = len(main_event_embedding(art).keys())
  e_kesk = len(event_embeddingute_keskmised(art).keys())
  e_kesk_kaal = len(event_embeddingute_kaalutatud_keskmised(art).keys())
  e_kesk_kaal_son = len(event_embeddingute_kaalutatud_keskmised_sonaliigid(art).keys())

  if not e_main == e_kesk == e_kesk_kaal == e_kesk_kaal_son:
    print(e_main, e_kesk, e_kesk_kaal, e_kesk_kaal_son)
    print(len(art.gold_events))
    break
  else:
    embe_arv += e_main
  
  if ge_arv != embe_arv:
    print(art.meta.get("filename"))
    print(e_main, e_kesk, e_kesk_kaal, e_kesk_kaal_son)
    print(len(art.gold_events))
    break

print("-")
print(gt_arv, embt_arv) # lugemisel tuleb erinevus aja_pm_2000_10_04.tasak.a015.sol-s oleva "1656-1661" tõttu. Kõik on siiski korras.
print(ge_arv, embe_arv)

muudan
-
627 628
4012 4012


## Mudelite treenimine erinevate kihtidega ja multiword tüüpidega

In [None]:
# Mudelid
clf1 = make_pipeline(StandardScaler(), LinearSVC(random_state=0, C=0.025))  # 0.59 - main_event_embedding(bert_embeddings)
clf2 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025)) # 0.64 - main_event_embedding(bert_embeddings)
clf3 = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=len(set(y)) - 2)) # 0.61 - main_event_embedding(bert_embeddings)
clf4 = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)) #0.50 - main_event_embedding(bert_embeddings)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)) # 0.66 - main_event_embedding(bert_embeddings)

nimed = ['LinearSVC(random_state=0, C=0.025)', 'SVC(random_state=0, kernel="linear", C=0.025)', 'KNeighborsClassifier(n_neighbors=len(set(y)) - 2)', 'RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)', 'MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)']
mudelid = [clf1,clf2,clf3,clf4,clf5]

kihid = ["bert_embeddings_fixed", "bert_embeddings_add_fixed", "bert_embeddings_all_add_fixed", "bert_embeddings_lastlayer_fixed", "bert_embeddings_penultimatelayer_fixed"]
# Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer
# Võimalikud multiword viisid: 0 - main_event_embedding
#                              1 - event_embeddingute_keskmised
#                              2 - event_embeddingute_kaalutatud_keskmised
#                              3 - event_embeddingute_kaalutatud_keskmised_sonaliigid


top3 = dict()
multiword_tyyp = {0:"main_event_embedding", 1:"event_embeddingute_keskmised", 2:"event_embeddingute_kaalutatud_keskmised", 3:"event_embeddingute_kaalutatud_keskmised_sonaliigid"}
# Iga multiword tüübi puhul
for i in range(4):
  print("--- multiword tüüp:", multiword_tyyp.get(i))
  # Proovi iga kihiga
  tyyp = dict()
  for kiht in kihid:
    print("------ kiht:", kiht)
    X, y = loo_X_ja_y_eventtimex(kiht, i)
    # Iga mudeliga
    mudelite_skoorid = []
    for j in range(len(mudelid)):
      clf = mudelid[j]
      m_nimi = nimed[j]
      mudeli_nimi_ja_f1 = prindi_tulemused(clf, m_nimi, X, y, kiht)
      mudelite_skoorid.append(mudeli_nimi_ja_f1)

    mudelite_skoorid.sort(key = lambda x: x[1], reverse=True)
    
    tyyp[kiht] = mudelite_skoorid[:3]
  
  top3[multiword_tyyp.get(i)] = tyyp

print(top3)

--- multiword tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.53 0.27 0.52
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50         2
BEFORE-OR-OVERLAP       0.60      0.38      0.46         8
         INCLUDES       0.00      0.00      0.00         2
      IS_INCLUDED       0.70      0.78      0.74        27
 OVERLAP-OR-AFTER       0.33      1.00      0.50         1
     SIMULTANEOUS       0.53      0.50      0.52        16

         accuracy                           0.61        56
        macro avg       0.44      0.53      0.45        56
     weighted avg       0.60      0.61      0.60        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5258116883116882 vabam: 0.5258116883116882
   Recall:     0.5258116883116882
   Precision:  0.5258116883116882
   F1-score:   0.5258116883116882
Üle kõikide ennustuste
   Accuracy: 0.5259391771019678 vabam: 0.525939177

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.55 0.31 0.56
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      1.00      1.00         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.57      0.57      0.57         7
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.75      0.69      0.72        35
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.43      0.50      0.46        12

         accuracy                           0.62        56
        macro avg       0.39      0.39      0.39        56
     weighted avg       0.65      0.62      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5490584415584415 vabam: 0.5499512987012987
   Recall:     0.5490584415584415
   Precision:  0.5490584415584415
   F1-score:   0.5490584415584415
Üle

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.47 0.2 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.67      1.00      0.80         2
BEFORE-OR-OVERLAP       0.17      0.20      0.18         5
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.75      0.60      0.67        35
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.36      0.38      0.37        13

         accuracy                           0.54        56
        macro avg       0.49      0.53      0.50        56
     weighted avg       0.61      0.54      0.57        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4704220779220779 vabam: 0.4704220779220779
   Recall:     0.4704220779220779
   Precision:  0.4704220779220779
   F1-score:   0.4704220779220779
Üle kõikide ennustuste
   Accuracy: 0.47048300536672627 vab

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.52 0.11 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       1.00      0.60      0.75        53
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.00      0.00      0.00         3

         accuracy                           0.57        56
        macro avg       0.14      0.09      0.11        56
     weighted avg       0.95      0.57      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5204545454545454 vabam: 0.5204545454545454
   Recall:     0.5204545454545454
   Precision:  0.52045454545454

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.58 0.31 0.6
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.84      0.69      0.76        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.50      0.73      0.59        11

         accuracy                           0.62        56
        macro avg       0.19      0.20      0.19        56
     weighted avg       0.69      0.62      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5848376623376623 vabam: 0.5857305194805195
   Recall:     0.5848376623376623
   Precision:  0.5848376623376623
   F1-score:   0.58483766233

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.52 0.23 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.25      0.29         4
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.66      0.70      0.68        27
 OVERLAP-OR-AFTER       0.75      1.00      0.86         3
     SIMULTANEOUS       0.73      0.52      0.61        21

         accuracy                           0.62        56
        macro avg       0.42      0.50      0.44        56
     weighted avg       0.66      0.62      0.63        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5239935064935064 vabam: 0.5239935064935064
   Recall:     0.5239935064935064
   Precision:  0.5239935064935064
   F1-score:   0.5239935064935064
Üle kõikid

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.54 0.31 0.55
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50         2
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.17      0.22         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.72      0.70      0.71        30
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.60      0.60      0.60        15

         accuracy                           0.61        56
        macro avg       0.38      0.38      0.37        56
     weighted avg       0.63      0.61      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5401298701298701 vabam: 0.5401298701298701
   Recall:     0.5401298701298701
   Precision:  0.5401298701298701
   F1-score:   0.5401298701298701

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.47 0.18 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.67      1.00      0.80         2
BEFORE-OR-OVERLAP       0.17      0.25      0.20         4
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.75      0.60      0.67        35
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.36      0.38      0.37        13

         accuracy                           0.54        56
        macro avg       0.49      0.54      0.51        56
     weighted avg       0.61      0.54      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4722402597402596 vabam: 0.4722402597402596
   Recall:     0.4722402597402596
   Precision:  0.4722402597402596
   F1-score:   0.4722402597402596
Üle kõikide ennustuste
   Accuracy: 0.4722719141323792

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.13 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.61      0.75        51
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.29      0.80      0.42         5

         accuracy                           0.62        56
        macro avg       0.21      0.23      0.19        56
     weighted avg       0.91      0.62      0.72        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5348051948051948 vabam: 0.5348051948051948
   Recall:     0.5348051948051948
   Precision:  0.5348051948051948
   F1-score:   0.5348051948051948
Üle kõikide ennust

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.58 0.31 0.6
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.84      0.69      0.76        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.50      0.73      0.59        11

         accuracy                           0.62        56
        macro avg       0.19      0.20      0.19        56
     weighted avg       0.69      0.62      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.583051948051948 vabam: 0.5839448051948052
   Recall:     0.583051948051948
   Precision:  0.583051948051948
   F1-score:   0.5830519480

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.51 0.22 0.52
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         2
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.80      0.71      0.75        34
 OVERLAP-OR-AFTER       0.33      1.00      0.50         1
     SIMULTANEOUS       0.60      0.53      0.56        17

         accuracy                           0.62        56
        macro avg       0.37      0.46      0.39        56
     weighted avg       0.69      0.62      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5132142857142857 vabam: 0.5141071428571429
   Recall:     0.5132142857142857
   Precision:  0.5132142857142857
   F1-score:   0.5132142857142857
Üle kõikide ennustuste
   Accuracy: 0.5134168157423972 vabam: 0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.51 0.27 0.51
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.75      0.67      0.71        36
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.70      0.54        10

         accuracy                           0.55        56
        macro avg       0.17      0.20      0.18        56
     weighted avg       0.56      0.55      0.55        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5060064935064935 vabam: 0.5068993506493507
   Recall:     0.5060064935064935
   Precision:  0.5060064935064935
   F1-score:   0.506006493506

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.46 0.18 0.5
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.11      0.50      0.18         2
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.79      0.57      0.67        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.31      0.42      0.36        12

         accuracy                           0.52        56
        macro avg       0.20      0.25      0.20        56
     weighted avg       0.64      0.52      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46142857142857147 vabam: 0.46142857142857147
   Recall:     0.46142857142857147
   Precision:  0.46142857142857147
   F1-score:   0.46142857142857147
Üle kõikide ennustuste
   Accuracy: 0.46153846

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.52 0.11 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.61      0.75        51
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.61        56
        macro avg       0.17      0.17      0.15        56
     weighted avg       0.90      0.61      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.520422077922078 vabam: 0.520422077922078
   Recall:     0.520422077922078
   Precision:  0.520422077

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.56 0.3 0.58
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.14      1.00      0.25         1
      IS_INCLUDED       0.84      0.68      0.75        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         2
     SIMULTANEOUS       0.53      0.53      0.53        15

         accuracy                           0.62        56
        macro avg       0.25      0.37      0.26        56
     weighted avg       0.71      0.62      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5633766233766234 vabam: 0.5642694805194806
   Recall:     0.5633766233766234
   Precision:  0.5633766233766234
   F1-score:   0.5633766233766234
Üle kõikide ennustuste
   Accuracy: 0.5635062

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.52 0.27 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50         2
BEFORE-OR-OVERLAP       0.60      0.43      0.50         7
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.73      0.69      0.71        32
 OVERLAP-OR-AFTER       0.33      0.50      0.40         2
     SIMULTANEOUS       0.47      0.54      0.50        13

         accuracy                           0.61        56
        macro avg       0.44      0.44      0.43        56
     weighted avg       0.63      0.61      0.62        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5240259740259741 vabam: 0.5240259740259741
   Recall:     0.5240259740259741
   Precision:  0.5240259740259741
   F1-score:   0.5240259740259741
Üle kõikide ennustuste
   Accuracy: 0.5241502683363148 vabam: 0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.54 0.29 0.54
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

BEFORE-OR-OVERLAP       0.44      0.57      0.50         7
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.83      0.69      0.75        35
 OVERLAP-OR-AFTER       0.00      0.00      0.00         3
     SIMULTANEOUS       0.31      0.50      0.38        10
            VAGUE       0.00      0.00      0.00         1

         accuracy                           0.59        56
        macro avg       0.26      0.29      0.27        56
     weighted avg       0.63      0.59      0.60        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5365909090909091 vabam: 0.5374837662337663
   Recall:     0.5365909090909091
   Precision:  0.5365909090909091
   F1-score:   0.5365909090909091
Üle kõikide ennustuste
   Accuracy: 0.53667262969588

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.47 0.18 0.5
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.80      0.65      0.72        37
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.57      0.35         7

         accuracy                           0.52        56
        macro avg       0.41      0.34      0.35        56
     weighted avg       0.60      0.52      0.54        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4669480519480519 vabam: 0.4669480519480519
   Recall:     0.4669480519480519
   Precision:  0.4669480519480519
   F1-score:   0.4669480519480519
Üle kõikide ennustuste
   Accuracy: 0.4669051878354204 vabam: 0.4669051878354204
   Recall:    micro - 0.466

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.52 0.11 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

BEFORE-OR-OVERLAP       0.11      1.00      0.20         1
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.56      0.71        50
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.57        56
        macro avg       0.25      0.43      0.24        56
     weighted avg       0.88      0.57      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5223051948051948 vabam: 0.5223051948051948
   Recall:     0.5223051948051948
   Precision:  0.5223051948051948
   F1-score:   0.5223051948051948
Üle kõikide ennustuste
   Accuracy: 0.5223613595706619 vabam: 0.5223613

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.57 0.3 0.58
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.84      0.73      0.78        37
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.50      0.62      0.55        13

         accuracy                           0.64        56
        macro avg       0.33      0.34      0.33        56
     weighted avg       0.69      0.64      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5651948051948052 vabam: 0.5660876623376623
   Recall:     0.5651948051948052
   Precision:  0.5651948051948052
   F1-score:   0.5

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.51 0.23 0.52
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.17      0.22         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.69      0.77      0.73        26
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.73      0.55      0.63        20

         accuracy                           0.62        56
        macro avg       0.39      0.45      0.40        56
     weighted avg       0.65      0.62      0.63        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5132467532467532 vabam: 0.5132467532467532
   Recall:     0.5132467532467532
   Precision:  0.5132467532467532
   F1-score:   0.51324675324675

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.52 0.25 0.52
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.50      0.50         2
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         4
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.76      0.71      0.73        31
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.60      0.56      0.58        16

         accuracy                           0.61        56
        macro avg       0.34      0.35      0.34        56
     weighted avg       0.64      0.61      0.62        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5186363636363636 vabam: 0.5186363636363636
   Recall:     0.5186363636363636
   Precision:  0.5186363636363636
   F1-score:   0.518

  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.46 0.15 0.5
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

BEFORE-OR-OVERLAP       0.11      0.50      0.18         2
         INCLUDES       0.00      0.00      0.00         2
      IS_INCLUDED       0.79      0.55      0.65        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.31      0.50      0.38        10

         accuracy                           0.52        56
        macro avg       0.24      0.31      0.24        56
     weighted avg       0.65      0.52      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4579545454545454 vabam: 0.4579545454545454
   Recall:     0.4579545454545454
   Precision:  0.4579545454545454
   F1-score:   0.4579545454545454
Üle kõikide ennustuste
   Accuracy: 0.45796064400715564 vabam: 0.45796064400715564
   Recall:    micr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.53 0.12 0.64
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.61      0.75        51
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.61        56
        macro avg       0.17      0.17      0.15        56
     weighted avg       0.90      0.61      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5276298701298701 vabam: 0.5276298701298701
   Recall:     0.5276298701298701
   Precision: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.57 0.3 0.59
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.88      0.72      0.79        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.50      0.67      0.57        12

         accuracy                           0.64        56
        macro avg       0.20      0.20      0.19        56
     weighted avg       0.72      0.64      0.67        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.570487012987013 vabam: 0.57137987012987
   Recall:     0.570487012987013
   Precision:  0.570487012987013
   F1-score:   0

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.52 0.3 0.52
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.40      0.25      0.31         8
      IS_INCLUDED       0.77      0.79      0.78        29
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.50      0.47      0.48        17

         accuracy                           0.61        56
        macro avg       0.38      0.40      0.38        56
     weighted avg       0.61      0.61      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5240909090909092 vabam: 0.5240909090909092
   Recall:     0.5240909090909092
   Precision:  0.5240909090909092
   F1-score:   0.5240909090909092
Üle kõikide ennustuste
   Accuracy: 0.5241502683363148 vabam: 0.5241502683363148
   Recall:    micro - 0.5241502683363148
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.53 0.27 0.54
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.17      0.22         6
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.84      0.68      0.75        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.44      0.88      0.58         8

         accuracy                           0.64        56
        macro avg       0.37      0.39      0.37        56
     weighted avg       0.72      0.64      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5257792207792208 vabam: 0.5266720779220779
   Recall:     0.5257792207792208
   Precision:  0.5257792207792208
   F1-score:   0.5257792207792208
Üle

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.47 0.19 0.52
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.67      0.36         6

         accuracy                           0.54        56
        macro avg       0.42      0.36      0.35        56
     weighted avg       0.63      0.54      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.47042207792207796 vabam: 0.47042207792207796
   Recall:     0.47042207792207796
   Precision:  0.47042207792207796
   F1-score:   0.47042207792207796
Üle kõikide ennustuste
   Accuracy: 0.47048300536672627 vabam: 0.47048300536672627
   Recall:    micro - 0.47048

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.55 0.13 0.65
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.61      0.75        51
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.61        56
        macro avg       0.17      0.17      0.15        56
     weighted avg       0.90      0.61      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5455194805194805 vabam: 0.5455194805194805
   Recall:     0.5455194805194805
   Precision:  0.54551948051948

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.57 0.28 0.59
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.88      0.68      0.77        41
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.70      0.54        10

         accuracy                           0.62        56
        macro avg       0.19      0.20      0.19        56
     weighted avg       0.72      0.62      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5722727272727273 vabam: 0.5731655844155844
   Recall:     0.5722727272727273
   Precision:  0.5722727272727273
   F1-score:   0.5722727272

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.24 0.54
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         2
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.57      0.57      0.57         7
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.69      0.79      0.73        28
 OVERLAP-OR-AFTER       1.00      0.50      0.67         2
     SIMULTANEOUS       0.50      0.44      0.47        16

         accuracy                           0.61        56
        macro avg       0.39      0.33      0.35        56
     weighted avg       0.59      0.61      0.60        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5312012987012987 vabam: 0.5312012987012987
   Recall:     0.5312012987012987
   Precision:  0.5312012987012987
   F1-score:   0.5312012987012987
Üle kõikid

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.27 0.54
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.12      0.18         8
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.81      0.70      0.75        37
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.44      0.78      0.56         9

         accuracy                           0.62        56
        macro avg       0.37      0.37      0.36        56
     weighted avg       0.67      0.62      0.63        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5311363636363637 vabam: 0.5320292207792209
   Recall:     0.5311363636363637
   Precision:  0.5311363636363637
   F1-score:   0.5311363636363637

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.46 0.17 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.20      0.10      0.13        10
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.80      0.38         5

         accuracy                           0.55        56
        macro avg       0.46      0.41      0.38        56
     weighted avg       0.66      0.55      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46334415584415584 vabam: 0.46334415584415584
   Recall:     0.46334415584415584
   Precision:  0.46334415584415584
   F1-score:   0.46334415584415584
Üle kõikide ennustuste
   Accuracy: 0.46332737030411447 vabam: 0.46332737030411447
   Recall:    micro - 0.4

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.12 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         1
      IS_INCLUDED       0.97      0.66      0.78        47
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.29      0.50      0.36         8

         accuracy                           0.62        56
        macro avg       0.21      0.19      0.19        56
     weighted avg       0.85      0.62      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.529448051948052 vabam: 0.529448051948052
   Recall:     0.529448051948052
   Precision:  0.529448051948052
   F1-score:   0.529448051948052
Üle kõikide ennustuste


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.57 0.31 0.58
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.88      0.68      0.77        41
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.70      0.54        10

         accuracy                           0.62        56
        macro avg       0.19      0.20      0.19        56
     weighted avg       0.72      0.62      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5705194805194805 vabam: 0.5705194805194805
   Recall:     0.5705194805194805
   Precision:  0.5705194805194805
   F1-score:   0.570519

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.53 0.26 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.29      0.67      0.40         3
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.81      0.68      0.74        37
     SIMULTANEOUS       0.47      0.50      0.48        14

         accuracy                           0.61        56
        macro avg       0.26      0.31      0.27        56
     weighted avg       0.66      0.61      0.63        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5274675324675324 vabam: 0.5283603896103896
   Recall:     0.5274675324675324
   Precision:  0.5274675324675324
   F1-score:   0.5274675324675324
Üle kõikide ennustuste
   Accuracy: 0.5277280858676208 vabam: 0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.5 0.25 0.51
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.33      0.40         3
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.50      1.00      0.67         1
      IS_INCLUDED       0.72      0.72      0.72        29
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.60      0.60      0.60        15

         accuracy                           0.59        56
        macro avg       0.37      0.45      0.39        56
     weighted avg       0.58      0.59      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5042207792207792 vabam: 0.5060064935064934
   Recall:     0.5042207792207792
   Precision:  0.5042207792207792
   F1-score:   0.5042207792207

  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.46 0.16 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
      IS_INCLUDED       0.87      0.62      0.72        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.19      0.50      0.27         6

         accuracy                           0.54        56
        macro avg       0.41      0.32      0.33        56
     weighted avg       0.71      0.54      0.59        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4632142857142857 vabam: 0.4632142857142857
   Recall:     0.4632142857142857
   Precision:  0.4632142857142857
   F1-score:   0.4632142857142857
Üle kõikide ennustuste
   Accuracy: 0.46332737030411447 vabam: 0.46332737030411447
   Recall:    micro - 0.46

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.53 0.13 0.62
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.14      1.00      0.25         1
      IS_INCLUDED       1.00      0.62      0.77        50
     SIMULTANEOUS       0.27      0.80      0.40         5

         accuracy                           0.64        56
        macro avg       0.28      0.48      0.28        56
     weighted avg       0.92      0.64      0.72        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5257792207792208 vabam: 0.5257792207792208
   Recall:     0.5257792207792208
   Precision:  0.5257792207792208
   F1-score:   0.5257792207792208
Üle kõikide ennustuste
   Accuracy: 0.5259391771019678 vabam: 0.525939177

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.55 0.29 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         3
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.83      0.73      0.77        33
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.67      0.62      0.65        16

         accuracy                           0.66        56
        macro avg       0.36      0.43      0.38        56
     weighted avg       0.71      0.66      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5544155844155845 vabam: 0.5553084415584416
   Recall:     0.5544155844155845
   Precision:  0.5544155844155845
   F1-score:   0.55

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.53 0.28 0.54
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.50      0.44      0.47         9
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.70      0.68      0.69        28
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.69      0.56      0.62        16
            VAGUE       0.00      0.00      0.00         0

         accuracy                           0.59        56
        macro avg       0.30      0.34      0.31        56
     weighted avg       0.64      0.59      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5329545454545455 vabam: 0.5338474025974026
   Recall:     0.5329545454545455
   Precision:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.53 0.29 0.53
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.33      0.50      0.40         2
BEFORE-OR-OVERLAP       0.67      0.50      0.57         8
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.57      0.73      0.64        22
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.79      0.52      0.63        21

         accuracy                           0.61        56
        macro avg       0.60      0.63      0.60        56
     weighted avg       0.65      0.61      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5258116883116883 vabam: 0.5258116883116883
   Recall:     0.5258116883116883
   Precision:  0.5258116883116883
   F1-score:   0.5258116883116883
Üle kõikide ennustuste
   Accuracy: 0.52593917710196

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.45 0.17 0.48
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.33      0.33      0.33         3
BEFORE-OR-OVERLAP       0.33      0.25      0.29         8
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.68      0.61      0.64        31
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.36      0.38      0.37        13

         accuracy                           0.50        56
        macro avg       0.45      0.43      0.44        56
     weighted avg       0.54      0.50      0.52        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4490584415584416 vabam: 0.4490584415584416
   Recall:     0.4490584415584416
   Precision:  0.4490584415584416
   F1-score:   0.4490584415584416
Üle kõikide ennustuste
   Accuracy: 0.4490161001

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.52 0.12 0.62
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.94      0.58      0.71        52
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.12      0.50      0.20         4

         accuracy                           0.57        56
        macro avg       0.15      0.15      0.13        56
     weighted avg       0.88      0.57      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5168506493506493 vabam: 0.5168506493506493
   Recall:     0.5168506493506493
   Precision:  0.5168

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.56 0.3 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         2
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.91      0.71      0.79        41
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.58      0.50        12

         accuracy                           0.66        56
        macro avg       0.33      0.33      0.33        56
     weighted avg       0.78      0.66      0.71        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5562337662337662 vabam: 0.5562337662337662
   Recall:     0.5562337662337662
   Precision:  0.5562337662337662
   F1-score:   0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.5 0.22 0.5
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         2
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.57      0.44      0.50         9
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.62      0.77      0.69        26
 OVERLAP-OR-AFTER       1.00      0.50      0.67         2
     SIMULTANEOUS       0.50      0.44      0.47        16

         accuracy                           0.57        56
        macro avg       0.39      0.31      0.33        56
     weighted avg       0.56      0.57      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5007467532467531 vabam: 0.5016558441558441
   Recall:     0.5007467532467531
   Precision:  0.5007467532467531
   F1-score:   0.5007467532467531

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.51 0.25 0.51
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.25      0.33         4
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         3
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.72      0.70      0.71        30
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.67      0.62      0.65        16

         accuracy                           0.61        56
        macro avg       0.34      0.32      0.32        56
     weighted avg       0.64      0.61      0.62        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5060714285714286 vabam: 0.5069642857142858
   Recall:     0.5060714285714286
   Precision:  0.5060714285714286
   F1-score:   0.506

  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.46 0.17 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.83      0.64      0.72        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.52        56
        macro avg       0.40      0.35      0.34        56
     weighted avg       0.63      0.52      0.55        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4597402597402597 vabam: 0.4597402597402597
   Recall:     0.4597402597402597
   Precision:  0.4597402597402597
   F1-score:   0.4597402597402597
Üle kõikide ennustuste
   Accuracy: 0.4597495527728086 vabam: 0.4597495527728086
   Recall:    micro

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.53 0.12 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.59      0.73        49
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.31      0.71      0.43         7

         accuracy                           0.61        56
        macro avg       0.26      0.26      0.23        56
     weighted avg       0.88      0.61      0.70        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5257142857142857 vabam: 0.5257142857142857
   Recall:     0.5257142857142857
   Precision:  0.5257142857142857
   F1-score:   0.5257142857142857
Üle kõikide ennustuste
   Accuracy: 0.5259391771019678 vabam: 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.55 0.29 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.88      0.70      0.78        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.64      0.52        11

         accuracy                           0.62        56
        macro avg       0.19      0.19      0.19        56
     weighted avg       0.71      0.62      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5544155844155845 vabam: 0.5544155844155845
   Recall:     0.5544155844155845
   Precision:  0.5544155844155845
   F1-scor

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.52 0.29 0.52
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.40      0.25      0.31         8
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.77      0.82      0.79        28
 OVERLAP-OR-AFTER       0.25      1.00      0.40         1
     SIMULTANEOUS       0.62      0.56      0.59        18

         accuracy                           0.64        56
        macro avg       0.34      0.44      0.35        56
     weighted avg       0.65      0.64      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.524025974025974 vabam: 0.524025974025974
   Recall:     0.524025974025974
   Precision:  0.524025974025974
   F1-score:   0.524025974025974
Üle kõikide ennustuste
   Accuracy: 0.5241502683363148 vabam: 0.52415026833631

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.54 0.28 0.55
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.84      0.66      0.74        41
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.50      0.89      0.64         9

         accuracy                           0.62        56
        macro avg       0.19      0.22      0.20        56
     weighted avg       0.70      0.62      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5401298701298701 vabam: 0.5401298701298701
   Recall:     0.5401298701298701
   Precision:  0.5401298701298701
   F1-score:   0.5401298701298701
Üle

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.47 0.19 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.67      0.36         6

         accuracy                           0.54        56
        macro avg       0.42      0.36      0.35        56
     weighted avg       0.63      0.54      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46685064935064935 vabam: 0.46685064935064935
   Recall:     0.46685064935064935
   Precision:  0.46685064935064935
   F1-score:   0.46685064935064935
Üle kõikide ennustuste
   Accuracy: 0.4669051878354204 vabam: 0.4669051878354204
   Recall:    micro - 0.4669051

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.55 0.13 0.66
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.58      0.73        53
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.12      0.67      0.21         3

         accuracy                           0.59        56
        macro avg       0.16      0.18      0.13        56
     weighted avg       0.92      0.59      0.70        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5509090909090909 vabam: 0.5509090909090909
   Recall:     0.5509090909090909
   Precision:  0.55090909090909

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.58 0.3 0.6
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.69      0.78        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.78      0.56         9

         accuracy                           0.64        56
        macro avg       0.19      0.21      0.19        56
     weighted avg       0.75      0.64      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5776298701298701 vabam: 0.5785227272727272
   Recall:     0.5776298701298701
   Precision:  0.5776298701298701
   F1-score:   0.577629870129

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.24 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.20      0.25         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.66      0.70      0.68        27
 OVERLAP-OR-AFTER       0.75      1.00      0.86         3
     SIMULTANEOUS       0.73      0.55      0.63        20

         accuracy                           0.62        56
        macro avg       0.42      0.49      0.44        56
     weighted avg       0.66      0.62      0.63        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5257792207792208 vabam: 0.5257792207792208
   Recall:     0.5257792207792208
   Precision:  0.5257792207792208
   F1-score:   0.5257792207792208
Üle kõikid

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.54 0.3 0.55
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.14      0.20         7
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.81      0.70      0.75        37
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.56      0.90      0.69        10

         accuracy                           0.66        56
        macro avg       0.39      0.39      0.38        56
     weighted avg       0.70      0.66      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5437337662337662 vabam: 0.5437337662337662
   Recall:     0.5437337662337662
   Precision:  0.5437337662337662
   F1-score:   0.5437337662337662


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.47 0.18 0.52
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.20      0.10      0.13        10
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.80      0.38         5

         accuracy                           0.55        56
        macro avg       0.46      0.41      0.38        56
     weighted avg       0.66      0.55      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4722727272727273 vabam: 0.4722727272727273
   Recall:     0.4722727272727273
   Precision:  0.4722727272727273
   F1-score:   0.4722727272727273
Üle kõikide ennustuste
   Accuracy: 0.47227191413237923 vabam: 0.47227191413237923
   Recall:    micro - 0.472271

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.53 0.13 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.58      0.72        50
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.31      0.83      0.45         6

         accuracy                           0.61        56
        macro avg       0.26      0.28      0.24        56
     weighted avg       0.90      0.61      0.70        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5329870129870129 vabam: 0.5329870129870129
   Recall:     0.5329870129870129
   Precision:  0.5329870129870129
   F1-score:   0.5329870129870129
Üle kõikide ennustuste
   Accuracy: 0.5330948121645797 vabam: 0.5330948121645

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.58 0.31 0.6
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      1.00      1.00         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.57      0.50      0.53         8
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.75      0.75      0.75        32
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.50      0.54      0.52        13

         accuracy                           0.64        56
        macro avg       0.40      0.40      0.40        56
     weighted avg       0.64      0.64      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5812337662337663 vabam: 0.5821266233766235
   Recall:     0.5812337662337663
   Precision:  0.5812337662337663
   F1-score:   0.5812337

  _warn_prf(average, modifier, msg_start, len(result))


559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.52 0.24 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.29      1.00      0.44         2
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.84      0.68      0.75        38
     SIMULTANEOUS       0.47      0.50      0.48        14

         accuracy                           0.62        56
        macro avg       0.27      0.36      0.28        56
     weighted avg       0.70      0.62      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5239285714285715 vabam: 0.5248214285714285
   Recall:     0.5239285714285715
   Precision:  0.5239285714285715
   F1-score:   0.5239285714285715
Üle kõikide ennustuste
   Accuracy: 0.5241502683363148 vabam: 0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.5 0.24 0.5
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.33      0.40         3
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.72      0.70      0.71        30
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.60      0.60      0.60        15

         accuracy                           0.57        56
        macro avg       0.30      0.30      0.29        56
     weighted avg       0.58      0.57      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.49525974025974023 vabam: 0.49615259740259743
   Recall:     0.49525974025974023
   Precision:  0.49525974025974023
   F1-score:   0.4952597402

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.47 0.18 0.52
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
      IS_INCLUDED       0.87      0.62      0.72        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.19      0.50      0.27         6

         accuracy                           0.54        56
        macro avg       0.41      0.32      0.33        56
     weighted avg       0.71      0.54      0.59        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.47042207792207796 vabam: 0.47042207792207796
   Recall:     0.47042207792207796
   Precision:  0.47042207792207796
   F1-score:   0.47042207792207796
Üle kõikide ennustuste
   Accuracy: 0.47048300536672627 vabam: 0.47048300536672627
   Recall:    micro -

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.52 0.12 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       1.00      0.60      0.75        53
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.12      0.67      0.21         3

         accuracy                           0.61        56
        macro avg       0.16      0.18      0.14        56
     weighted avg       0.95      0.61      0.72        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5239935064935065 vabam: 0.5239935064935065
   Recall:     0.5239935064935065
   Precision:  0.523993

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.56 0.3 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.76      0.71      0.73        31
 OVERLAP-OR-AFTER       0.50      1.00      0.67         2
     SIMULTANEOUS       0.73      0.65      0.69        17

         accuracy                           0.64        56
        macro avg       0.36      0.48      0.39        56
     weighted avg       0.67      0.64      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5598051948051948 vabam: 0.5606980519480519
   Recall:     0.5598051948051948
   Precision:  0.5598051948051948
   F1-score:   0.559

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.53 0.27 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.50      0.50      0.50         8
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.74      0.65      0.69        31
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.62      0.57      0.59        14
            VAGUE       0.00      0.00      0.00         0

         accuracy                           0.59        56
        macro avg       0.29      0.34      0.31        56
     weighted avg       0.64      0.59      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5258116883116883 vabam: 0.5267045454545454
   Recall:     0.5258116883116883
   Precision:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.53 0.29 0.54
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.33      0.50      0.40         2
BEFORE-OR-OVERLAP       0.50      0.50      0.50         6
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.64      0.72      0.68        25
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.71      0.50      0.59        20

         accuracy                           0.61        56
        macro avg       0.57      0.62      0.58        56
     weighted avg       0.63      0.61      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5347402597402596 vabam: 0.5347402597402596
   Recall:     0.5347402597402596
   Precision:  0.5347402597402596
   F1-score:   0.5347402597402596
Üle kõikide ennustuste
   Accuracy: 0.53488372093023

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.45 0.17 0.49
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.77      0.64      0.70        36
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.50      0.33         8

         accuracy                           0.50        56
        macro avg       0.40      0.33      0.34        56
     weighted avg       0.56      0.50      0.52        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4508441558441558 vabam: 0.4508441558441558
   Recall:     0.4508441558441558
   Precision:  0.4508441558441558
   F1-score:   0.4508441558441558
Üle kõikide ennustuste
   Accuracy: 0.45080500894454384 vabam: 0.45080500894454384
   Recall:    micro - 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.51 0.12 0.61
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.63      0.74        46
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.38      0.60      0.46        10

         accuracy                           0.62        56
        macro avg       0.18      0.18      0.17        56
     weighted avg       0.81      0.62      0.69        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5079545454545454 vabam: 0.5079545454545454
   Recall:     0.5079545454545454
   Precision:  0.5079

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.56 0.3 0.58
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         3
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.88      0.70      0.78        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.58      0.50        12

         accuracy                           0.64        56
        macro avg       0.33      0.33      0.33        56
     weighted avg       0.74      0.64      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5633766233766234 vabam: 0.5642694805194806
   Recall:     0.5633766233766234
   Precision:  0.5633766233766234
   F1-score:   0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.5 0.22 0.5
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
      IS_INCLUDED       0.73      0.73      0.73        30
 OVERLAP-OR-AFTER       0.25      1.00      0.40         1
     SIMULTANEOUS       0.50      0.44      0.47        18

         accuracy                           0.55        56
        macro avg       0.30      0.44      0.32        56
     weighted avg       0.56      0.55      0.55        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.49899350649350654 vabam: 0.49899350649350654
   Recall:     0.49899350649350654
   Precision:  0.49899350649350654
   F1-score:   0.49899350649350654
Üle kõikide ennustuste
   Accuracy: 0.4991055456171735 vabam: 0.4991055456171735
   Recall:    micro - 0.4991055

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.52 0.27 0.52
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.25      0.33         4
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         3
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.72      0.70      0.71        30
 OVERLAP-OR-AFTER       0.75      0.75      0.75         4
     SIMULTANEOUS       0.60      0.60      0.60        15

         accuracy                           0.61        56
        macro avg       0.37      0.33      0.34        56
     weighted avg       0.64      0.61      0.62        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5168506493506494 vabam: 0.5168506493506494
   Recall:     0.5168506493506494
   Precision:  0.5168506493506494
   F1-score:   0.516

  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.47 0.18 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         2
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.52        56
        macro avg       0.40      0.35      0.34        56
     weighted avg       0.62      0.52      0.55        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46688311688311684 vabam: 0.46688311688311684
   Recall:     0.46688311688311684
   Precision:  0.46688311688311684
   F1-score:   0.46688311688311684
Üle kõikide ennustuste
   Accuracy: 0.4669051878354204 vabam: 0.4669051878354204
   Recall:    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.53 0.13 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.62      0.76        50
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.19      0.50      0.27         6

         accuracy                           0.61        56
        macro avg       0.17      0.16      0.15        56
     weighted avg       0.89      0.61      0.70        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5311038961038961 vabam: 0.5311038961038961
   Recall:     0.5311038961038961
   Precision: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.57 0.29 0.59
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.71      0.79        41
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.78      0.56         9

         accuracy                           0.64        56
        macro avg       0.19      0.21      0.19        56
     weighted avg       0.73      0.64      0.67        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5687012987012987 vabam: 0.5695941558441558
   Recall:     0.5687012987012987
   Precision:  0.5687012987012987
   F1-scor

  _warn_prf(average, modifier, msg_start, len(result))


muudan
559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.53 0.3 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.40      0.22      0.29         9
      IS_INCLUDED       0.77      0.82      0.79        28
 OVERLAP-OR-AFTER       0.25      1.00      0.40         1
     SIMULTANEOUS       0.62      0.56      0.59        18

         accuracy                           0.64        56
        macro avg       0.41      0.52      0.41        56
     weighted avg       0.65      0.64      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5276298701298702 vabam: 0.5276298701298702
   Recall:     0.5276298701298702
   Precision:  0.5276298701298702
   F1-score:   0.5276298701298702
Üle kõikide ennustuste
   Accuracy: 0.5277280858676208 vabam: 0.5277280858676208
   Recall:    micro - 0.5277280858676208
           

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.53 0.27 0.54
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.14      0.20         7
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.84      0.68      0.75        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.44      0.88      0.58         8

         accuracy                           0.62        56
        macro avg       0.23      0.24      0.22        56
     weighted avg       0.71      0.62      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5329545454545455 vabam: 0.5329545454545455
   Recall:     0.5329545454545455
   Precision:  0.5329545454545455
   F1-score:   0.5329545454545455
Üle

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.47 0.2 0.52
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.25      0.33         4
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         1
         INCLUDES       0.50      1.00      0.67         1
      IS_INCLUDED       0.90      0.57      0.69        46
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.13      0.50      0.21         4

         accuracy                           0.54        56
        macro avg       0.29      0.33      0.27        56
     weighted avg       0.79      0.54      0.62        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.47042207792207796 vabam: 0.47042207792207796
   Recall:     0.47042207792207796
   Precision:  0.47042207792207796
   F1-score:   0.4704220779220

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.53 0.13 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.14      1.00      0.25         1
      IS_INCLUDED       0.94      0.62      0.75        48
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.29      0.57      0.38         7

         accuracy                           0.62        56
        macro avg       0.23      0.37      0.23        56
     weighted avg       0.84      0.62      0.69        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5330194805194804 vabam: 0.5330194805194804
   Recall:     0.5330194805194804
   Precision:  0.5330194805194804
   F1-score:   0.5330194805194804
Üle kõikide ennustuste

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.58 0.3 0.6
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.69      0.78        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.78      0.56         9

         accuracy                           0.64        56
        macro avg       0.19      0.21      0.19        56
     weighted avg       0.75      0.64      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5812337662337661 vabam: 0.5812337662337661
   Recall:     0.5812337662337661
   Precision:  0.5812337662337661
   F1-score:   0.581233766233

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.52 0.24 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.20      0.25         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.62      0.72      0.67        25
 OVERLAP-OR-AFTER       0.75      0.75      0.75         4
     SIMULTANEOUS       0.73      0.52      0.61        21

         accuracy                           0.61        56
        macro avg       0.42      0.46      0.42        56
     weighted avg       0.64      0.61      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5222077922077921 vabam: 0.5222077922077921
   Recall:     0.5222077922077921
   Precision:  0.5222077922077921
   F1-score:   0.5222077922077921
Üle kõikid

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.54 0.28 0.55
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.14      0.20         7
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.81      0.70      0.75        37
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.56      0.90      0.69        10

         accuracy                           0.66        56
        macro avg       0.39      0.39      0.38        56
     weighted avg       0.70      0.66      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5364935064935066 vabam: 0.5364935064935066
   Recall:     0.5364935064935066
   Precision:  0.5364935064935066
   F1-score:   0.5364935064935066

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.47 0.18 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.20      0.10      0.13        10
      IS_INCLUDED       0.83      0.66      0.74        38
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.80      0.38         5

         accuracy                           0.55        56
        macro avg       0.46      0.41      0.38        56
     weighted avg       0.66      0.55      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46870129870129873 vabam: 0.46870129870129873
   Recall:     0.46870129870129873
   Precision:  0.46870129870129873
   F1-score:   0.46870129870129873
Üle kõikide ennustuste
   Accuracy: 0.46869409660107336 vabam: 0.46869409660107336
   Recall:    micro - 0.4

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.52 0.12 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.14      1.00      0.25         1
      IS_INCLUDED       1.00      0.65      0.79        49
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.29      0.67      0.40         6

         accuracy                           0.66        56
        macro avg       0.24      0.39      0.24        56
     weighted avg       0.91      0.66      0.74        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5240584415584416 vabam: 0.5240584415584416
   Recall:     0.5240584415584416
   Precision:  0.5240584415584416
   F1-score:   0.5240584415584416
Üle kõikide ennust

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.58 0.32 0.59
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.69      0.78        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.78      0.56         9

         accuracy                           0.64        56
        macro avg       0.19      0.21      0.19        56
     weighted avg       0.75      0.64      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5758441558441558 vabam: 0.576737012987013
   Recall:     0.5758441558441558
   Precision:  0.5758441558441558
   F1-score:   0.5758441

  _warn_prf(average, modifier, msg_start, len(result))


muudan
559
559


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.53 0.25 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.29      1.00      0.44         2
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.84      0.68      0.75        38
     SIMULTANEOUS       0.47      0.50      0.48        14

         accuracy                           0.62        56
        macro avg       0.27      0.36      0.28        56
     weighted avg       0.70      0.62      0.65        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5256818181818181 vabam: 0.5265746753246753
   Recall:     0.5256818181818181
   Precision:  0.5256818181818181
   F1-score:   0.5256818181818181
Üle kõikide ennustuste
   Accuracy: 0.5259391771019678 vabam: 0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.5 0.26 0.51
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.33      0.40         3
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.50      1.00      0.67         1
      IS_INCLUDED       0.72      0.72      0.72        29
 OVERLAP-OR-AFTER       0.25      0.50      0.33         2
     SIMULTANEOUS       0.60      0.60      0.60        15

         accuracy                           0.59        56
        macro avg       0.37      0.45      0.39        56
     weighted avg       0.58      0.59      0.58        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5024675324675325 vabam: 0.5033603896103896
   Recall:     0.5024675324675325
   Precision:  0.5024675324675325
   F1-score:   0.5024675324675

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.47 0.18 0.52
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         5
      IS_INCLUDED       0.87      0.62      0.72        42
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.19      0.50      0.27         6

         accuracy                           0.54        56
        macro avg       0.41      0.32      0.33        56
     weighted avg       0.71      0.54      0.59        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.46863636363636363 vabam: 0.46863636363636363
   Recall:     0.46863636363636363
   Precision:  0.46863636363636363
   F1-score:   0.46863636363636363
Üle kõikide ennustuste
   Accuracy: 0.46869409660107336 vabam: 0.46869409660107336
   Recall:    micro -

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.53 0.12 0.63
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         0
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.97      0.60      0.74        52
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.12      0.50      0.20         4

         accuracy                           0.59        56
        macro avg       0.16      0.16      0.13        56
     weighted avg       0.91      0.59      0.70        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5258441558441558 vabam: 0.5258441558441558
   Recall:     0.5258441558441558
   Precision:  0.525844

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.55 0.3 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         4
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.79      0.72      0.75        32
 OVERLAP-OR-AFTER       0.50      1.00      0.67         2
     SIMULTANEOUS       0.73      0.65      0.69        17

         accuracy                           0.66        56
        macro avg       0.36      0.48      0.40        56
     weighted avg       0.70      0.66      0.68        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5544480519480519 vabam: 0.555340909090909
   Recall:     0.5544480519480519
   Precision:  0.5544480519480519
   F1-score:   0.5544

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.53 0.28 0.53
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      1.00      0.67         1
           BEFORE       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.50      0.50      0.50         8
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.70      0.66      0.68        29
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.69      0.56      0.62        16
            VAGUE       0.00      0.00      0.00         0

         accuracy                           0.59        56
        macro avg       0.30      0.34      0.31        56
     weighted avg       0.64      0.59      0.61        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5294155844155843 vabam: 0.5303084415584416
   Recall:     0.5294155844155843
   Precision:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.52 0.29 0.53
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.33      0.50      0.40         2
BEFORE-OR-OVERLAP       0.67      0.40      0.50        10
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.54      0.75      0.63        20
 OVERLAP-OR-AFTER       0.50      0.67      0.57         3
     SIMULTANEOUS       0.71      0.50      0.59        20

         accuracy                           0.59        56
        macro avg       0.62      0.64      0.61        56
     weighted avg       0.62      0.59      0.59        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.524025974025974 vabam: 0.524025974025974
   Recall:     0.524025974025974
   Precision:  0.524025974025974
   F1-score:   0.524025974025974
Üle kõikide ennustuste
   Accuracy: 0.5241502683363148 va

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.45 0.17 0.48
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.77      0.64      0.70        36
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.25      0.50      0.33         8

         accuracy                           0.50        56
        macro avg       0.40      0.33      0.34        56
     weighted avg       0.56      0.50      0.52        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.44548701298701304 vabam: 0.44548701298701304
   Recall:     0.44548701298701304
   Precision:  0.44548701298701304
   F1-score:   0.44548701298701304
Üle kõikide ennustuste
   Accuracy: 0.44543828264758495 vabam: 0.44543828264758495
   Recall:    micro

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.52 0.12 0.62
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.14      1.00      0.25         1
      IS_INCLUDED       1.00      0.63      0.77        51
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.14      0.50      0.22         4

         accuracy                           0.62        56
        macro avg       0.21      0.35      0.21        56
     weighted avg       0.92      0.62      0.72        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5222077922077923 vabam: 0.5222077922077923
   Recall:     0.5222077922077923
   Precision:  0.5222077922077923
   F1-score:   0.5222077922077923
Üle kõikide 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.56 0.29 0.57
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         3
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.84      0.69      0.76        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.54      0.48        13

         accuracy                           0.62        56
        macro avg       0.33      0.32      0.32        56
     weighted avg       0.71      0.62      0.66        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.557987012987013 vabam: 0.557987012987013
   Recall:     0.557987012987013
   Precision:  0.557987012987013
   F1-score:   0.5579

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.5 0.22 0.5
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         2
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.57      0.44      0.50         9
         INCLUDES       0.00      0.00      0.00         1
      IS_INCLUDED       0.62      0.77      0.69        26
 OVERLAP-OR-AFTER       1.00      0.50      0.67         2
     SIMULTANEOUS       0.50      0.44      0.47        16

         accuracy                           0.57        56
        macro avg       0.39      0.31      0.33        56
     weighted avg       0.56      0.57      0.56        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5007142857142857 vabam: 0.5016233766233766
   Recall:     0.5007142857142857
   Precision:  0.5007142857142857
   F1-score:   0.5007142857142857

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.51 0.26 0.52
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.50      0.25      0.33         4
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         2
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.72      0.70      0.71        30
 OVERLAP-OR-AFTER       0.75      0.75      0.75         4
     SIMULTANEOUS       0.67      0.62      0.65        16

         accuracy                           0.62        56
        macro avg       0.38      0.33      0.35        56
     weighted avg       0.67      0.62      0.64        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5096428571428573 vabam: 0.5096428571428573
   Recall:     0.5096428571428573
   Precision:  0.5096428571428573
   F1-score:   0.509

  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.47 0.17 0.51
```
KNeighborsClassifier(n_neighbors=len(set(y)) - 2)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       1.00      0.50      0.67         2
BEFORE-OR-OVERLAP       0.00      0.00      0.00         9
      IS_INCLUDED       0.83      0.64      0.72        39
 OVERLAP-OR-AFTER       0.00      0.00      0.00         1
     SIMULTANEOUS       0.19      0.60      0.29         5

         accuracy                           0.52        56
        macro avg       0.40      0.35      0.34        56
     weighted avg       0.63      0.52      0.55        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4650974025974025 vabam: 0.4650974025974025
   Recall:     0.4650974025974025
   Precision:  0.4650974025974025
   F1-score:   0.4650974025974025
Üle kõikide ennustuste
   Accuracy: 0.46511627906976744 vabam: 0.46511627906976744
   Recall:    mic

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.53 0.12 0.64
```
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.29      1.00      0.44         2
      IS_INCLUDED       0.97      0.66      0.78        47
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.29      0.57      0.38         7

         accuracy                           0.66        56
        macro avg       0.26      0.37      0.27        56
     weighted avg       0.86      0.66      0.72        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5328896103896104 vabam: 0.5328896103896104
   Recall:     0.5328896103896104
   Precision:  0.5328896103896104
   F1-score:   0.5328896103896104
Üle k

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.57 0.29 0.59
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.00      0.00      0.00         6
         INCLUDES       0.00      0.00      0.00         0
      IS_INCLUDED       0.91      0.72      0.81        40
 OVERLAP-OR-AFTER       0.00      0.00      0.00         0
     SIMULTANEOUS       0.44      0.70      0.54        10

         accuracy                           0.64        56
        macro avg       0.19      0.20      0.19        56
     weighted avg       0.73      0.64      0.67        56

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.5723051948051948 vabam: 0.5723051948051948
   Recall:     0.5723051948051948
   Precision:  0.5723051948051948
   F1-scor

  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
for key in top3.keys():
  print("------------------------ multiword_tüüp:", key)
  for kiht in top3.get(key).keys():
    print("------ kiht:", kiht)
    for mudeli_tulemus in top3.get(key).get(kiht):
      print("### ", mudeli_tulemus[1], mudeli_tulemus[2], mudeli_tulemus[3], "-", mudeli_tulemus[0])

------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.58 0.31 0.6 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.55 0.31 0.56 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.53 0.27 0.52 - LinearSVC(random_state=0, C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.58 0.31 0.6 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.54 0.31 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.53 0.13 0.63 - RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
------ kiht: bert_embeddings_all_add_fixed
###  0.56 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.52 0.11 0.63 - RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)
###  0.51 0.22 0.52 - LinearSVC(random_state=0, C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.57 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
##

## Top3 mudelid erinevate kombinatsioonidega



```
------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.58 0.29 0.6 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.55 0.3 0.56 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.27 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.58 0.31 0.6 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.54 0.31 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.24 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.57 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.51 0.23 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.51 0.27 0.51 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.57 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.3 0.54 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.27 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.57 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.25 0.53 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.51 0.23 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------------------------ multiword_tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
###  0.58 0.3 0.6 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.27 0.54 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.29 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.56 0.3 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.24 0.54 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.53 0.27 0.54 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_all_add_fixed
###  0.56 0.29 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.25 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.51 0.26 0.51 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.55 0.29 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.28 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.53 0.29 0.53 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.55 0.29 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.51 0.25 0.52 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.5 0.23 0.51 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
###  0.57 0.29 0.59 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.54 0.27 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.28 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.58 0.32 0.6 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.54 0.29 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.53 0.24 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.56 0.29 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.23 0.52 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.49 0.24 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.57 0.28 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.28 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.53 0.3 0.54 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.56 0.29 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.26 0.53 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.5 0.22 0.5 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
###  0.58 0.3 0.6 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.29 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.53 0.27 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_add_fixed
###  0.57 0.3 0.58 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.54 0.28 0.55 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.52 0.24 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.55 0.29 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.25 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.5 0.27 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.56 0.29 0.57 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.53 0.28 0.53 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.52 0.29 0.53 - SVC(random_state=0, kernel="linear", C=0.025)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.57 0.29 0.59 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.52 0.26 0.52 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.51 0.23 0.51 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
```



## Valitud mudel

In [None]:
X, y = loo_X_ja_y_eventtimex("bert_embeddings_add_fixed", 0)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=3, solver="sgd", max_iter=10000, random_state=0))
#clf5 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025, class_weight="balanced"))
tulemused = ristvalideeri(clf5, X, y)
#tulemused = prindi_tulemused(clf5, 'MLPClassifier(alpha=3, solver="sgd", max_iter=10000, random_state=0)', X, y, "bert_embeddings_penultimatelayer_fixed")

559
559
559
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1


  _warn_prf(average, modifier, msg_start, len(result))


split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [None]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

559
341
293
138
148
                   precision    recall  f1-score   support

            AFTER       0.21      0.30      0.25        10
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.33      0.36      0.34        59
         INCLUDES       0.25      0.67      0.36         3
      IS_INCLUDED       0.80      0.69      0.74       341
 OVERLAP-OR-AFTER       0.15      0.50      0.24         8
     SIMULTANEOUS       0.48      0.51      0.50       138
            VAGUE       0.00      0.00      0.00         0

         accuracy                           0.60       559
        macro avg       0.28      0.38      0.30       559
     weighted avg       0.65      0.60      0.62       559

Micro
- recall: 0.5992844364937389
- precsision: 0.5992844364937389
- f1-score: 0.5992844364937389
Macro
- recall: 0.37791357905966744
- precsision: 0.27872871793520254
- f1-score: 0.3034833274840051
Weighted
- recall: 0.5992844364937389
- precsision: 0.648170781275056

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# Mudel - tlink-main-events

loe_main_relatsioonid() meetodiga saadud sõnastikust saab (failinimi, ID) kaudu kätte kõik relatsioonid, kus selle ID'ga sündmus/ajaväljend on.  
See tähendab, et kui "ajaleht2" sündmused "e2" ja "e3" on suhtes, siis ("ajaleht2", "e2") ja ("ajaleht2", "e3") sisaldavad samasugust relatsiooni.  
Korduvused tuleb õppimise jaoks eemaldada.

In [31]:
def loe_main_relatsioonid():
  data = None
  with open(drive_path + 'relations_only_main.pickle', 'rb') as fp:
      #data = json.load(fp)
      data = pickle.load(fp)
  return data
main_rels = loe_main_relatsioonid()

koik = []
for key in main_rels:
  koik.extend(main_rels.get(key))
print("Iga evendi kõik relatsioonid:", len(koik))

def main_relatsioonid_korduvusteta():
  main_rels = loe_main_relatsioonid()
  # korduvused välja
  uus_dict = {}
  #kasutatud_rels = []
  for key in main_rels:
    relations = main_rels.get(key)
    sobivad = []
    for rel in relations:
      if rel[0] == key[1]:
        sobivad.append(rel)
    uus_dict[key] = sobivad
  return uus_dict

uus_dict = main_relatsioonid_korduvusteta()

koik_uus = []
for key in uus_dict:
  koik_uus.extend(uus_dict.get(key))
print("Relatsioonide kordused väljas:", len(koik_uus))


names = []
for art in laetud_artiklid_embeddingutega:
  names.append(art.meta.get("filename"))

kor = []
for key in uus_dict:
  if key[0] in names:
    kor.extend(uus_dict.get(key))
print("Korras artiklite omi:", len(kor))


Iga evendi kõik relatsioonid: 5104
Relatsioonide kordused väljas: 2552
Korras artiklite omi: 2552


In [32]:
def loo_X_ja_y_main_events (embedding_layer_name, event_embeddings_nr): 
  main_relatsioonid = main_relatsioonid_korduvusteta()
  artikkel_event = []
  X = []
  y = []

  for artikkel in laetud_artiklid_embeddingutega:
    leitud_suhteid = 0
    # Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer

    if event_embeddings_nr == 0:
      event_embeddings = main_event_embedding(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 1:
      event_embeddings = event_embeddingute_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 2:
      event_embeddings = event_embeddingute_kaalutatud_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 3:
      event_embeddings = event_embeddingute_kaalutatud_keskmised_sonaliigid(artikkel, layer_name=embedding_layer_name)

    for item in event_embeddings.items():
      event_relations = main_relatsioonid.get((artikkel.meta.get("filename"), item[0]))
      leitud = []
      if not event_relations:
        continue
      for relation in event_relations:
        event_event_embedding = concat_embedding_event_event(item[0], relation[2], event_embeddings)
        leitud.append((event_event_embedding, relation[1]))
      if leitud == []:
        prindi = False
        if prindi:
          print(artikkel.meta.get("filename"))
          print(event_relations)
          print("POLE event_timex RELATIONIT")
      else:
        for leid in leitud:
          X.append(leid[0])
          y.append(leid[1])
          leitud_suhteid += 1
        artikkel_event.append((artikkel.meta.get("filename"), item[0]))
  return X, y

X, y = loo_X_ja_y_main_events("bert_embeddings_fixed", 0)

## Mudelite treenimine erinevate kihtidega ja multiword tüüpidega

In [None]:
# Mudelid
clf1 = make_pipeline(StandardScaler(), LinearSVC(random_state=0, C=0.025, max_iter=2000, dual=False))  # 0.59 - main_event_embedding(bert_embeddings)
clf2 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025)) # 0.64 - main_event_embedding(bert_embeddings)
#clf3 = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=len(set(y)) - 2)) # 0.61 - main_event_embedding(bert_embeddings)
#clf4 = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)) #0.50 - main_event_embedding(bert_embeddings)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=0.2, max_iter=2000, random_state=0)) # 0.66 - main_event_embedding(bert_embeddings)

nimed = ['LinearSVC(random_state=0, C=0.025)', 'SVC(random_state=0, kernel="linear", C=0.025)', 'MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)']
mudelid = [clf1,clf2,clf5]

kihid = ["bert_embeddings_fixed", "bert_embeddings_add_fixed", "bert_embeddings_all_add_fixed", "bert_embeddings_lastlayer_fixed", "bert_embeddings_penultimatelayer_fixed"]
# Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer
# Võimalikud multiword viisid: 0 - main_event_embedding
#                              1 - event_embeddingute_keskmised
#                              2 - event_embeddingute_kaalutatud_keskmised
#                              3 - event_embeddingute_kaalutatud_keskmised_sonaliigid

top3 = dict()
multiword_tyyp = {0:"main_event_embedding", 1:"event_embeddingute_keskmised", 2:"event_embeddingute_kaalutatud_keskmised", 3:"event_embeddingute_kaalutatud_keskmised_sonaliigid"}
# Iga multiword tüübi puhul
for i in range(4):
  print("--- multiword tüüp:", multiword_tyyp.get(i))
  # Proovi iga kihiga
  tyyp = dict()
  for kiht in kihid:
    print("------ kiht:", kiht)
    X, y = loo_X_ja_y_main_events(kiht, i)
    # Iga mudeliga
    mudelite_skoorid = []
    for j in range(len(mudelid)):
      clf = mudelid[j]
      m_nimi = nimed[j]
      mudeli_nimi_ja_f1 = prindi_tulemused(clf, m_nimi, X, y, kiht)
      mudelite_skoorid.append(mudeli_nimi_ja_f1)

    mudelite_skoorid.sort(key = lambda x: x[1], reverse=True)
    
    tyyp[kiht] = mudelite_skoorid[:3]
  
  top3[multiword_tyyp.get(i)] = tyyp

  with open(drive_path + 'top3_main_events.json', 'w') as fp:
    json.dump(top3, fp)

print(top3)

--- multiword tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
2552
2552


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.47 0.38 0.47
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.53      0.54      0.54        35
           BEFORE       0.71      0.64      0.68        70
BEFORE-OR-OVERLAP       0.37      0.39      0.38        18
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.36      0.34      0.35        29
      IS_INCLUDED       0.53      0.50      0.51        36
 OVERLAP-OR-AFTER       0.18      0.33      0.24         6
     SIMULTANEOUS       0.42      0.23      0.29        22
            VAGUE       0.47      0.62      0.53        39

         accuracy                           0.51       255
        macro avg       0.40      0.40      0.39       255
     weighted avg       0.52      0.51      0.51       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4670986519607843 vabam: 0.483356311274509

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.49 0.41 0.5
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.68      0.61      0.64        46
           BEFORE       0.69      0.57      0.62        61
BEFORE-OR-OVERLAP       0.45      0.40      0.43        25
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.56      0.57      0.56        35
      IS_INCLUDED       0.50      0.53      0.52        30
 OVERLAP-OR-AFTER       0.38      0.38      0.38        13
     SIMULTANEOUS       0.18      0.31      0.23        13
            VAGUE       0.56      0.62      0.59        32

         accuracy                           0.54       255
        macro avg       0.44      0.44      0.44       255
     weighted avg       0.57      0.54      0.55       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.49178615196078435 vabam: 0.5123

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_fixed - 0.46 0.37 0.46
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.70      0.67        37
           BEFORE       0.63      0.55      0.59        58
BEFORE-OR-OVERLAP       0.32      0.30      0.31        23
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.50      0.60      0.55        30
      IS_INCLUDED       0.56      0.56      0.56        32
 OVERLAP-OR-AFTER       0.38      0.33      0.36        15
     SIMULTANEOUS       0.32      0.39      0.35        18
            VAGUE       0.56      0.48      0.51        42

         accuracy                           0.52       255
        macro avg       0.43      0.44      0.43       255
     weighted avg       0.53      0.52      0.52       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4604273897058824 vab

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.44 0.35 0.45
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.56      0.55      0.55        42
           BEFORE       0.63      0.67      0.65        48
BEFORE-OR-OVERLAP       0.36      0.35      0.36        23
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.44      0.46      0.45        35
      IS_INCLUDED       0.69      0.61      0.65        36
 OVERLAP-OR-AFTER       0.15      0.17      0.16        12
     SIMULTANEOUS       0.18      0.21      0.20        19
            VAGUE       0.58      0.54      0.56        39

         accuracy                           0.50       255
        macro avg       0.40      0.39      0.40       255
     weighted avg       0.51      0.50      0.51       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.44437653186274517 vabam: 0.4602443321

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.48 0.39 0.48
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.66      0.55      0.60        49
           BEFORE       0.65      0.53      0.58        62
BEFORE-OR-OVERLAP       0.41      0.47      0.44        19
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.58      0.58      0.58        36
      IS_INCLUDED       0.50      0.50      0.50        32
 OVERLAP-OR-AFTER       0.31      0.33      0.32        12
     SIMULTANEOUS       0.23      0.31      0.26        16
            VAGUE       0.50      0.64      0.56        28

         accuracy                           0.52       255
        macro avg       0.43      0.44      0.43       255
     weighted avg       0.54      0.52      0.53       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4765073529411765 vabam: 0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_add_fixed - 0.48 0.41 0.49
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.63      0.60      0.62        43
           BEFORE       0.73      0.58      0.64        64
BEFORE-OR-OVERLAP       0.45      0.53      0.49        19
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.58      0.66      0.62        32
      IS_INCLUDED       0.56      0.53      0.55        34
 OVERLAP-OR-AFTER       0.31      0.40      0.35        10
     SIMULTANEOUS       0.23      0.31      0.26        16
            VAGUE       0.64      0.62      0.63        37

         accuracy                           0.56       255
        macro avg       0.46      0.47      0.46       255
     weighted avg       0.59      0.56      0.57       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4847257965686274

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.45 0.38 0.45
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.67      0.55      0.60        40
           BEFORE       0.58      0.53      0.55        59
BEFORE-OR-OVERLAP       0.32      0.40      0.35        15
         IDENTITY       0.50      0.50      0.50         2
         INCLUDES       0.44      0.59      0.51        27
      IS_INCLUDED       0.46      0.60      0.52        30
 OVERLAP-OR-AFTER       0.13      0.22      0.17         9
     SIMULTANEOUS       0.48      0.34      0.40        32
            VAGUE       0.60      0.51      0.55        41

         accuracy                           0.50       255
        macro avg       0.46      0.47      0.46       255
     weighted avg       0.52      0.50      0.51       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4522120098039216 vabam: 0.4684719

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.47 0.4 0.48
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.58      0.53      0.55        40
           BEFORE       0.68      0.55      0.61        78
BEFORE-OR-OVERLAP       0.37      0.27      0.31        26
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.54      0.58      0.56        26
      IS_INCLUDED       0.56      0.59      0.58        32
 OVERLAP-OR-AFTER       0.36      0.44      0.40         9
     SIMULTANEOUS       0.25      0.21      0.23        14
            VAGUE       0.39      0.69      0.50        29

         accuracy                           0.52       255
        macro avg       0.41      0.43      0.42       255
     weighted avg       0.53      0.52      0.52       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.47415900735294125 vabam

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_all_add_fixed - 0.47 0.4 0.48
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.45      0.48      0.47        29
           BEFORE       0.75      0.73      0.74        67
BEFORE-OR-OVERLAP       0.38      0.69      0.49        16
         INCLUDES       0.47      0.42      0.45        40
      IS_INCLUDED       0.62      0.39      0.48        33
 OVERLAP-OR-AFTER       0.58      0.64      0.61        11
     SIMULTANEOUS       0.36      0.41      0.38        22
            VAGUE       0.56      0.54      0.55        37

         accuracy                           0.55       255
        macro avg       0.52      0.54      0.52       255
     weighted avg       0.56      0.55      0.55       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4745235906862746 vabam: 0.49156939338235295
   Recall:     0.47452359068

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.44 0.36 0.44
```
LinearSVC(random_state=0, C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.59      0.60      0.59        40
           BEFORE       0.63      0.70      0.66        46
BEFORE-OR-OVERLAP       0.41      0.35      0.38        26
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.42      0.45      0.43        33
      IS_INCLUDED       0.69      0.56      0.62        39
 OVERLAP-OR-AFTER       0.23      0.38      0.29         8
     SIMULTANEOUS       0.09      0.11      0.10        19
            VAGUE       0.67      0.56      0.61        43

         accuracy                           0.51       255
        macro avg       0.41      0.41      0.41       255
     weighted avg       0.53      0.51      0.52       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.441609987745098 vabam: 0.457479

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.48 0.39 0.49
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.73      0.60      0.66        50
           BEFORE       0.76      0.59      0.67        66
BEFORE-OR-OVERLAP       0.41      0.43      0.42        21
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.50      0.51      0.51        35
      IS_INCLUDED       0.53      0.55      0.54        31
 OVERLAP-OR-AFTER       0.31      0.31      0.31        13
     SIMULTANEOUS       0.18      0.36      0.24        11
            VAGUE       0.50      0.64      0.56        28

         accuracy                           0.55       255
        macro avg       0.44      0.44      0.43       255
     weighted avg       0.59      0.55      0.56       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.48354319852941174 va

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_lastlayer_fixed - 0.46 0.37 0.47
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.69      0.64      0.67        39
           BEFORE       0.68      0.63      0.66        68
BEFORE-OR-OVERLAP       0.37      0.28      0.32        25
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.64      0.53      0.58        34
      IS_INCLUDED       0.47      0.62      0.53        26
 OVERLAP-OR-AFTER       0.27      0.43      0.33         7
     SIMULTANEOUS       0.50      0.24      0.32        25
            VAGUE       0.43      0.71      0.54        31

         accuracy                           0.55       255
        macro avg       0.45      0.45      0.44       255
     weighted avg       0.57      0.55      0.55       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4647349877

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.48 0.38 0.48
```
SVC(random_state=0, kernel="linear", C=0.025)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.76      0.51      0.61        49
           BEFORE       0.62      0.63      0.63        52
BEFORE-OR-OVERLAP       0.21      0.29      0.24        14
         IDENTITY       0.00      0.00      0.00         2
         INCLUDES       0.50      0.56      0.53        32
      IS_INCLUDED       0.56      0.71      0.63        31
 OVERLAP-OR-AFTER       0.20      0.20      0.20        15
     SIMULTANEOUS       0.35      0.35      0.35        23
            VAGUE       0.57      0.54      0.56        37

         accuracy                           0.52       255
        macro avg       0.42      0.42      0.42       255
     weighted avg       0.54      0.52      0.53       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4773023897058

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### bert_embeddings_penultimatelayer_fixed - 0.47 0.4 0.48
```
MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
Parima alamhulga raport:
                   precision    recall  f1-score   support

            AFTER       0.61      0.69      0.65        32
           BEFORE       0.65      0.60      0.63        68
BEFORE-OR-OVERLAP       0.42      0.35      0.38        23
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.54      0.41      0.46        37
      IS_INCLUDED       0.65      0.65      0.65        34
 OVERLAP-OR-AFTER       0.27      0.30      0.29        10
     SIMULTANEOUS       0.42      0.28      0.33        18
            VAGUE       0.45      0.72      0.55        32

         accuracy                           0.55       255
        macro avg       0.45      0.44      0.44       255
     weighted avg       0.55      0.55      0.54       255

--- Ristvalideerimise tulemused ---
Tulemuste keskmised (micro):
   Accuracy:   0.4745

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
with open(drive_path + 'top3_main_events.json', 'r') as fp:
    top3 = json.load(fp)

In [None]:
for key in top3.keys():
  print("------------------------ multiword_tüüp:", key)
  for kiht in top3.get(key).keys():
    print("------ kiht:", kiht)
    for mudeli_tulemus in top3.get(key).get(kiht):
      print("### ", mudeli_tulemus[1], mudeli_tulemus[2], mudeli_tulemus[3], "-", mudeli_tulemus[0])
  print()

------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.49 0.41 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.38 0.47 - LinearSVC(random_state=0, C=0.025)
###  0.46 0.37 0.46 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
------ kiht: bert_embeddings_add_fixed
###  0.48 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.48 0.41 0.49 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.44 0.35 0.45 - LinearSVC(random_state=0, C=0.025)
------ kiht: bert_embeddings_all_add_fixed
###  0.47 0.4 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.4 0.48 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.45 0.38 0.45 - LinearSVC(random_state=0, C=0.025)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.48 0.39 0.49 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.46 0.37 0.47 - MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)
###  0.44 0.36 0.44 - Li

## Top3 mudelid erinvate kombinatsioonidega



```
------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.49 0.41 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.38 0.47 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.47 0.38 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
------ kiht: bert_embeddings_add_fixed
###  0.5 0.42 0.51 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.45 0.36 0.45 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.47 0.39 0.47 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.37 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.45 0.37 0.45 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.49 0.39 0.49 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.48 0.39 0.49 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.49 0.41 0.5 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.48 0.4 0.49 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
###  0.49 0.4 0.49 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.46 0.37 0.46 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.46 0.38 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
------ kiht: bert_embeddings_add_fixed
###  0.48 0.4 0.49 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.45 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.4 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.36 0.43 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.48 0.4 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.38 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.49 0.4 0.49 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
###  0.49 0.41 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.48 0.4 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.38 0.47 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.48 0.4 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.47 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.45 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.49 0.41 0.5 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.37 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.48 0.38 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.48 0.39 0.49 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.39 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.34 0.43 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
###  0.49 0.4 0.5 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.37 0.47 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
###  0.47 0.38 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
------ kiht: bert_embeddings_add_fixed
###  0.49 0.4 0.49 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.45 0.35 0.45 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.47 0.39 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.39 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.44 0.36 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.48 0.38 0.48 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.47 0.38 0.48 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.44 0.35 0.44 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.47 0.38 0.47 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.47 0.37 0.47 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
```



## Valitud mudel

In [None]:
X, y = loo_X_ja_y_main_events("bert_embeddings_add_fixed", 0)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=10000, random_state=0))
#clf5 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025, class_weight="balanced"))
tulemused = ristvalideeri(clf5, X, y)
#tulemused = prindi_tulemused(clf5, 'MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=10000, random_state=0)', X, y, "bert_embeddings_penultimatelayer_fixed")

2552
2552
2552
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2


  _warn_prf(average, modifier, msg_start, len(result))


split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [None]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

2552
336
321
211
228
                   precision    recall  f1-score   support

            AFTER       0.60      0.55      0.57       405
           BEFORE       0.67      0.59      0.63       637
BEFORE-OR-OVERLAP       0.28      0.38      0.32       145
         IDENTITY       0.09      1.00      0.17         1
         INCLUDES       0.48      0.50      0.49       315
      IS_INCLUDED       0.53      0.51      0.52       336
 OVERLAP-OR-AFTER       0.20      0.26      0.23       120
     SIMULTANEOUS       0.35      0.37      0.36       211
            VAGUE       0.56      0.57      0.57       382

         accuracy                           0.51      2552
        macro avg       0.42      0.52      0.43      2552
     weighted avg       0.53      0.51      0.52      2552

Micro
- recall: 0.5117554858934169
- precsision: 0.5117554858934169
- f1-score: 0.5117554858934169
Macro
- recall: 0.524471734955591
- precsision: 0.41771222580227113
- f1-score: 0.42722828056311407
Weighted
-

# Mudel - tlink-subordinate-events

In [27]:
def loe_sub_relatsioonid():
  data = None
  with open(drive_path + 'relations_only_sub.pickle', 'rb') as fp:
      #data = json.load(fp)
      data = pickle.load(fp)
  return data
sub_rels = loe_sub_relatsioonid()

koik = []
for key in sub_rels:
  koik.extend(sub_rels.get(key))
print("Iga evendi kõik relatsioonid:", len(koik))

def sub_relatsioonid_korduvusteta():
  sub_rels = loe_sub_relatsioonid()
  # korduvused välja
  uus_dict = {}
  for key in sub_rels:
    relations = sub_rels.get(key)
    sobivad = []
    for rel in relations:
      if rel[0] == key[1]:
        sobivad.append(rel)
    uus_dict[key] = sobivad
  return uus_dict

uus_dict = sub_relatsioonid_korduvusteta()

koik_uus = []
for key in uus_dict:
  koik_uus.extend(uus_dict.get(key))
print("Relatsioonide kordused väljas:", len(koik_uus))


names = []
for art in laetud_artiklid_embeddingutega:
  names.append(art.meta.get("filename"))

kor = []
for key in uus_dict:
  if key[0] in names:
    kor.extend(uus_dict.get(key))
print("Korras artiklite omi:", len(kor))

Iga evendi kõik relatsioonid: 6254
Relatsioonide kordused väljas: 3127
Korras artiklite omi: 3127


In [30]:
def loo_X_ja_y_sub_events(embedding_layer_name, event_embeddings_nr): 
  main_relatsioonid = sub_relatsioonid_korduvusteta()
  artikkel_event = []
  X = []
  y = []

  for artikkel in laetud_artiklid_embeddingutega:
    leitud_suhteid = 0
    # Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer

    if event_embeddings_nr == 0:
      event_embeddings = main_event_embedding(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 1:
      event_embeddings = event_embeddingute_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 2:
      event_embeddings = event_embeddingute_kaalutatud_keskmised(artikkel, layer_name=embedding_layer_name)
    elif event_embeddings_nr == 3:
      event_embeddings = event_embeddingute_kaalutatud_keskmised_sonaliigid(artikkel, layer_name=embedding_layer_name)

    for item in event_embeddings.items():
      event_relations = main_relatsioonid.get((artikkel.meta.get("filename"), item[0]))
      leitud = []
      if not event_relations:
        continue
      for relation in event_relations:
        event_event_embedding = concat_embedding_event_event(item[0], relation[2], event_embeddings)
        leitud.append((event_event_embedding, relation[1]))
      if leitud == []:
        prindi = False
        if prindi:
          print(artikkel.meta.get("filename"))
          print(event_relations)
          print("POLE event_event RELATIONIT")
      else:
        for leid in leitud:
          X.append(leid[0])
          y.append(leid[1])
          leitud_suhteid += 1
        artikkel_event.append((artikkel.meta.get("filename"), item[0]))

  return X, y

X, y = loo_X_ja_y_sub_events("bert_embeddings_fixed", 0)

## Mudelite treenimine erinevate kihtidega ja multiword tüüpidega

In [None]:
# Mudelid
clf1 = make_pipeline(LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=20000, dual=False))  # 0.59 - main_event_embedding(bert_embeddings)
clf2 = make_pipeline(SVC(random_state=0, kernel="linear", C=0.025)) # 0.64 - main_event_embedding(bert_embeddings)
#clf3 = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=len(set(y)) - 2)) # 0.61 - main_event_embedding(bert_embeddings)
#clf4 = make_pipeline(StandardScaler(), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=20, random_state=0)) #0.50 - main_event_embedding(bert_embeddings)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=0.2, max_iter=2000, random_state=0)) # 0.66 - main_event_embedding(bert_embeddings)

nimed = ['LinearSVC(random_state=0, C=0.025)', 'SVC(random_state=0, kernel="linear", C=0.025)', 'MLPClassifier(alpha=0.2, max_iter=1000, random_state=0)']
mudelid = [clf1,clf2,clf5]

kihid = ["bert_embeddings_fixed", "bert_embeddings_add_fixed", "bert_embeddings_all_add_fixed", "bert_embeddings_lastlayer_fixed", "bert_embeddings_penultimatelayer_fixed"]
# Võimalikud kihid: bert_embeddings, bert_embeddings_add, bert_embeddings_all_add, bert_embeddings_lastlayer, bert_embeddings_penultimatelayer
# Võimalikud multiword viisid: 0 - main_event_embedding
#                              1 - event_embeddingute_keskmised
#                              2 - event_embeddingute_kaalutatud_keskmised
#                              3 - event_embeddingute_kaalutatud_keskmised_sonaliigid

top3 = dict()
multiword_tyyp = {0:"main_event_embedding", 1:"event_embeddingute_keskmised", 2:"event_embeddingute_kaalutatud_keskmised", 3:"event_embeddingute_kaalutatud_keskmised_sonaliigid"}
# Iga multiword tüübi puhul
for i in range(4):
  print("--- multiword tüüp:", multiword_tyyp.get(i))
  # Proovi iga kihiga
  tyyp = dict()
  for kiht in kihid:
    print("------ kiht:", kiht)
    X, y = loo_X_ja_y_sub_events(kiht, i)
    # Iga mudeliga
    mudelite_skoorid = []
    for j in range(len(mudelid)):
      clf = mudelid[j]
      m_nimi = nimed[j]
      print(m_nimi)
      mudeli_nimi_ja_f1 = prindi_tulemused(clf, m_nimi, X, y, kiht)
      mudelite_skoorid.append(mudeli_nimi_ja_f1)

    mudelite_skoorid.sort(key = lambda x: x[1], reverse=True)
    
    tyyp[kiht] = mudelite_skoorid[:3]
  
  top3[multiword_tyyp.get(i)] = tyyp

  with open(drive_path + 'top3_main_events.json', 'w') as fp:
    json.dump(top3, fp)

print(top3)

--- multiword tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
3127
3127
LinearSVC(random_state=0, C=0.025)
split 0


KeyboardInterrupt: ignored

In [None]:
for key in top3.keys():
  print("------------------------ multiword_tüüp:", key)
  for kiht in top3.get(key).keys():
    print("------ kiht:", kiht)
    for mudeli_tulemus in top3.get(key).get(kiht):
      print("### ", mudeli_tulemus[1], mudeli_tulemus[2], mudeli_tulemus[3], "-", mudeli_tulemus[0])
  print()

## Top3 mudelid erinevate kombinatsioonidega



```
------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.32 0.39 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.44 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.43 0.35 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.41 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
###  0.44 0.37 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.34 0.42 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.33 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.46 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.36 0.44 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.41 0.34 0.41 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.32 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.43 0.36 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.4 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
```



## Valitud mudel

In [None]:
X, y = loo_X_ja_y_sub_events("bert_embeddings_penultimatelayer_fixed", 1)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=1, solver="lbfgs", max_iter=10000, random_state=0))
#clf5 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=1))
tulemused = ristvalideeri(clf5, X, y)
#tulemused = prindi_tulemused(clf5, 'MLPClassifier(alpha=1, solver="lbfgs", max_iter=10000, random_state=0)', X, y, "bert_embeddings_penultimatelayer_fixed")

3127
3127
3127
split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7


  _warn_prf(average, modifier, msg_start, len(result))


split 8
split 9


In [None]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

3127
278
304
283
318
                   precision    recall  f1-score   support

            AFTER       0.56      0.54      0.55       399
           BEFORE       0.62      0.55      0.58       728
BEFORE-OR-OVERLAP       0.38      0.41      0.39       395
         IDENTITY       0.00      0.00      0.00         0
         INCLUDES       0.43      0.44      0.43       355
      IS_INCLUDED       0.41      0.45      0.43       278
 OVERLAP-OR-AFTER       0.23      0.28      0.25       120
     SIMULTANEOUS       0.36      0.41      0.39       283
            VAGUE       0.43      0.41      0.42       569

         accuracy                           0.46      3127
        macro avg       0.38      0.39      0.38      3127
     weighted avg       0.47      0.46      0.46      3127

Micro
- recall: 0.4595458906299968
- precsision: 0.4595458906299968
- f1-score: 0.45954589062999684
Macro
- recall: 0.3859220308387335
- precsision: 0.3799646927619876
- f1-score: 0.38221937080010343
Weighted


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


- recall: 0.4595458906299968
- precsision: 0.4679755893418784
- f1-score: 0.4629301383525842
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.4595458906299968


# Hilisemad katsetused

## Mudel - kõik koos

In [None]:
# Eraldan igast suhterühmast testimiseks andmeid, et katsetada eraldi nende tulemusi.
X_train_data = []
y_train_data = []

layer = "bert_embeddings_penultimatelayer_fixed"
viis = 0

X, y = loo_X_ja_y_DCT(layer, viis, False)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train_data.extend(X_train)
y_train_data.extend(y_train)
event_dct_data = (X_test, y_test)
X, y = loo_X_ja_y_eventtimex(layer, viis)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
X_train_data.extend(X_train)
y_train_data.extend(y_train)
event_timex_data = (X_test, y_test)
X, y = loo_X_ja_y_main_events(layer, viis)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
X_train_data.extend(X_train)
y_train_data.extend(y_train)
main_event_data = (X_test, y_test)
X, y = loo_X_ja_y_sub_events(layer, viis)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
X_train_data.extend(X_train)
y_train_data.extend(y_train)
sub_event_data = (X_test, y_test)

test_data = [event_dct_data, event_timex_data, main_event_data, sub_event_data]
print(len(X_train_data), len(y_train_data))

4010
4010
559
559
2552
2552
3127
3127
9222 9222


In [None]:
X_train_data_padded = []
lenghts = set()
for emb in X_train_data:
  emblist = emb.tolist()
  if len(emblist) != 1536:
    korda = 1536 - len(emblist)
    for i in range(korda):
      emblist.append(0.0)
  X_train_data_padded.append(emblist)
  lenghts.add(len(emb))
print(lenghts)
lenghts = set()
for emb in X_train_data_padded:
  lenghts.add(len(emb))
print(lenghts)
#X_train_data = list(X_train_data)
#y_train_data = list(y_train_data)
proportions = get_proportions(y_train_data, osakaal=False)
print("Treenimisel relatsioonitüüpid:", proportions)
all_y = []
for ryhm in test_data:
  all_y.extend(ryhm[1])
proportions = get_proportions(all_y, osakaal=False)
print("Testimisel relatsioonitüüpid:", proportions)

{768, 1536}
{1536}
Treenimisel relatsioonitüüpid: 
BEFORE-OR-OVERLAP: 957
INCLUDES: 1432
VAGUE: 1314
SIMULTANEOUS: 625
IDENTITY: 14
BEFORE: 2453
AFTER: 1113
IS_INCLUDED: 855
OVERLAP-OR-AFTER: 459

Testimisel relatsioonitüüpid: 
INCLUDES: 168
BEFORE-OR-OVERLAP: 102
VAGUE: 147
SIMULTANEOUS: 70
IDENTITY: 1
BEFORE: 268
AFTER: 123
IS_INCLUDED: 95
OVERLAP-OR-AFTER: 52



In [None]:
print(len(tulemused[0]))
print(tulemused[0].count("IS_INCLUDED"))
print(tulemused[1].count("IS_INCLUDED"))
print(tulemused[0].count("SIMULTANEOUS"))
print(tulemused[1].count("SIMULTANEOUS"))
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

9222
891
855
585
625
                   precision    recall  f1-score   support

            AFTER       0.54      0.53      0.53      1139
           BEFORE       0.74      0.70      0.72      2578
BEFORE-OR-OVERLAP       0.42      0.45      0.44       888
         IDENTITY       0.00      0.00      0.00         1
         INCLUDES       0.57      0.55      0.56      1468
      IS_INCLUDED       0.53      0.51      0.52       891
 OVERLAP-OR-AFTER       0.14      0.19      0.16       336
     SIMULTANEOUS       0.34      0.36      0.35       585
            VAGUE       0.45      0.44      0.45      1336

         accuracy                           0.54      9222
        macro avg       0.41      0.42      0.41      9222
     weighted avg       0.55      0.54      0.54      9222

Micro
- recall: 0.5373021036651485
- precsision: 0.5373021036651485
- f1-score: 0.5373021036651485
Macro
- recall: 0.41583446193019835
- precsision: 0.4141750407297642
- f1-score: 0.4143673192848268
Weighted
-

In [None]:
ryhma_nimi = {0:"event_dct_data", 1:"event_timex_data", 2:"main_event_data", 3:"sub_event_data"}
ryhma_nr = 0
for ryhm in test_data:
  print("Rühm:", ryhma_nimi.get(ryhma_nr))

  X_test = []
  lenghts = set()
  for emb in ryhm[0]:
    emblist = emb.tolist()
    if len(emblist) != 1536:
      korda = 1536 - len(emblist)
      for i in range(korda):
        emblist.append(0.0)
    X_test.append(emblist)
    lenghts.add(len(emb))

  y_pred = clf5.predict(X_test)

  report = classification_report(y_pred, ryhm[1])
  print(report)
  print("Micro")
  print("- recall:", recall_score(y_pred, ryhm[1], average="micro"))
  print("- precsision:", precision_score(y_pred, ryhm[1], average="micro"))
  print("- f1-score:", f1_score(y_pred, ryhm[1], average="micro"))
  print("Macro")
  print("- recall:", recall_score(y_pred, ryhm[1], average="macro"))
  print("- precsision:", precision_score(y_pred, ryhm[1], average="macro"))
  print("- f1-score:", f1_score(y_pred, ryhm[1], average="macro"))
  print("Weighted")
  print("- recall:", recall_score(y_pred, ryhm[1], average="weighted"))
  print("- precsision:", precision_score(y_pred, ryhm[1], average="weighted"))
  print("- f1-score:", f1_score(y_pred, ryhm[1], average="weighted"))

  print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
  oigeid = 0
  for i in range(len(y_pred)):
    if y_pred[i] == ryhm[1][i]:
      oigeid += 1
  print("Skoor:", str(oigeid / len(y_pred)))

  ryhma_nr += 1
  print()

Rühm: event_dct_data
                   precision    recall  f1-score   support

            AFTER       0.48      0.48      0.48        46
           BEFORE       0.86      0.81      0.83       156
BEFORE-OR-OVERLAP       0.56      0.68      0.61        28
         INCLUDES       0.69      0.65      0.67       104
      IS_INCLUDED       0.00      0.00      0.00         0
 OVERLAP-OR-AFTER       0.30      0.60      0.40        10
     SIMULTANEOUS       0.00      0.00      0.00         1
            VAGUE       0.45      0.43      0.44        56

         accuracy                           0.66       401
        macro avg       0.42      0.46      0.43       401
     weighted avg       0.68      0.66      0.67       401

Micro
- recall: 0.6608478802992519
- precsision: 0.6608478802992519
- f1-score: 0.6608478802992519
Macro
- recall: 0.45586777353081703
- precsision: 0.4183507296633465
- f1-score: 0.4299044350842652
Weighted
- recall: 0.6608478802992519
- precsision: 0.68029663761095


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                   precision    recall  f1-score   support

            AFTER       0.00      0.00      0.00         0
           BEFORE       0.00      0.00      0.00         1
BEFORE-OR-OVERLAP       0.50      1.00      0.67         3
         INCLUDES       1.00      1.00      1.00         1
      IS_INCLUDED       0.79      0.64      0.71        36
 OVERLAP-OR-AFTER       0.33      0.50      0.40         2
     SIMULTANEOUS       0.47      0.58      0.52        12
            VAGUE       0.00      0.00      0.00         1

         accuracy                           0.62        56
        macro avg       0.39      0.47      0.41        56
     weighted avg       0.67      0.62      0.63        56

Micro
- recall: 0.625
- precsision: 0.625
- f1-score: 0.625
Macro
- recall: 0.4652777777777778
- precsision: 0.38663793103448274
- f1-score: 0.4116096866096866
Weighted
- recall: 0.625
- precsision: 0.6663998357963876
- f1-score: 0.6339133089133089
TempEval-2 järgi (õigete vastuste arv / 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                   precision    recall  f1-score   support

            AFTER       0.51      0.61      0.56        33
           BEFORE       0.63      0.54      0.58        76
BEFORE-OR-OVERLAP       0.36      0.34      0.35        44
         INCLUDES       0.31      0.46      0.37        24
      IS_INCLUDED       0.37      0.31      0.34        35
 OVERLAP-OR-AFTER       0.21      0.25      0.23        12
     SIMULTANEOUS       0.34      0.39      0.37        28
            VAGUE       0.42      0.38      0.40        61

         accuracy                           0.43       313
        macro avg       0.39      0.41      0.40       313
     weighted avg       0.44      0.43      0.43       313

Micro
- recall: 0.43130990415335463
- precsision: 0.43130990415335463
- f1-score: 0.43130990415335463
Macro
- recall: 0.40987109399803534
- precsision: 0.39364654442779445
- f1-score: 0.39813360940598225
Weighted
- recall: 0.43130990415335463
- precsision: 0.44232591157511286
- f1-score: 

### Top3 mudelid erinevate kombinatsioonidega



```
------------------------ multiword_tüüp: main_event_embedding
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.32 0.39 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.44 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.43 0.35 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.41 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_keskmised
------ kiht: bert_embeddings_fixed
###  0.44 0.37 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.34 0.42 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.33 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.46 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.42 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.43 0.36 0.44 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.41 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)

------------------------ multiword_tüüp: event_embeddingute_kaalutatud_keskmised_sonaliigid
------ kiht: bert_embeddings_fixed
###  0.44 0.36 0.45 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.41 0.34 0.41 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.39 0.32 0.4 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_add_fixed
###  0.44 0.36 0.44 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.37 0.31 0.37 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_all_add_fixed
###  0.43 0.35 0.43 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.43 0.36 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.38 0.31 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_lastlayer_fixed
###  0.43 0.35 0.43 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.4 0.33 0.4 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.36 0.3 0.36 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
------ kiht: bert_embeddings_penultimatelayer_fixed
###  0.45 0.37 0.45 - MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)
###  0.42 0.34 0.42 - SVC(random_state=0, kernel="linear", C=0.025)
###  0.38 0.32 0.38 - LinearSVC(random_state=0, C=0.025, tol=1.0e-6, max_iter=5000, dual=False)
```



In [None]:
X, y = loo_X_ja_y_main_events("bert_embeddings_add_fixed", 0)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=4, solver="adam", max_iter=10000, random_state=0))
#clf5 = make_pipeline(StandardScaler(), SVC(random_state=0, kernel="linear", C=0.025, class_weight="balanced"))
tulemused = ristvalideeri(clf5, X, y)
#tulemused = prindi_tulemused(clf5, "MLPClassifier(alpha=0.2, max_iter=5000, random_state=0)", X, y, "bert_embeddings_penultimatelayer_fixed")

## tlink-event-dct lihtsustatud relatsioonitüübid
IDENTITY, INCLUDES, IS_INCLUDED ja SIMULTANEOUS => OVERLAP

In [45]:
def relatsioonityypide_lihtsustamine(y):
  for i in range(len(y)):
    if y[i] in ["IDENTITY", "INCLUDES", "IS_INCLUDED", "SIMULTANEOUS"]:
      y[i] = "OVERLAP"

In [46]:
X, y = loo_X_ja_y_DCT("bert_embeddings_penultimatelayer_fixed", 0, False)
relatsioonityypide_lihtsustamine(y)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=5000, random_state=0))
tulemused = ristvalideeri(clf5, X, y)

4010
4010
split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [47]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

                   precision    recall  f1-score   support

            AFTER       0.51      0.49      0.50       478
           BEFORE       0.86      0.82      0.84      1597
BEFORE-OR-OVERLAP       0.60      0.73      0.66       313
          OVERLAP       0.67      0.63      0.65       992
 OVERLAP-OR-AFTER       0.13      0.20      0.15       120
            VAGUE       0.45      0.46      0.45       510

         accuracy                           0.66      4010
        macro avg       0.53      0.55      0.54      4010
     weighted avg       0.67      0.66      0.67      4010

Micro
- recall: 0.6603491271820449
- precsision: 0.6603491271820449
- f1-score: 0.6603491271820449
Macro
- recall: 0.5543356227724944
- precsision: 0.5340770998528938
- f1-score: 0.5414562678712995
Weighted
- recall: 0.6603491271820449
- precsision: 0.6749083636422127
- f1-score: 0.6664594327796154
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.6603491271820449


## tlink-event-timex lihtsustatud relatsioonitüübid
IDENTITY, INCLUDES, IS_INCLUDED ja SIMULTANEOUS => OVERLAP

In [57]:
X, y = loo_X_ja_y_eventtimex("bert_embeddings_add_fixed", 0)
print(get_propotions(y))
relatsioonityypide_lihtsustamine(y)
print("Peale lihtsustamist")
print(get_propotions(y))
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=3, solver="sgd", max_iter=10000, random_state=0))
tulemused = ristvalideeri(clf5, X, y)

split 0


  _warn_prf(average, modifier, msg_start, len(result))


split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [58]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

                   precision    recall  f1-score   support

            AFTER       0.21      0.60      0.32         5
           BEFORE       0.00      0.00      0.00         0
BEFORE-OR-OVERLAP       0.25      0.36      0.30        44
          OVERLAP       0.93      0.84      0.88       498
 OVERLAP-OR-AFTER       0.15      0.33      0.21        12
            VAGUE       0.00      0.00      0.00         0

         accuracy                           0.79       559
        macro avg       0.26      0.36      0.28       559
     weighted avg       0.85      0.79      0.82       559

Micro
- recall: 0.7889087656529516
- precsision: 0.7889087656529516
- f1-score: 0.7889087656529515
Macro
- recall: 0.3560545211147621
- precsision: 0.258842967640295
- f1-score: 0.28469482680440106
Weighted
- recall: 0.7889087656529516
- precsision: 0.8545780180599757
- f1-score: 0.8173388936618443
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.7889087656529516


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## tlink-main-event lihtsustatud relatsioonitüübid
IDENTITY, INCLUDES, IS_INCLUDED ja SIMULTANEOUS => OVERLAP

In [61]:
X, y = loo_X_ja_y_main_events("bert_embeddings_add_fixed", 0)
relatsioonityypide_lihtsustamine(y)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=2.8, solver="lbfgs", max_iter=10000, random_state=0))
tulemused = ristvalideeri(clf5, X, y)

split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [62]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

                   precision    recall  f1-score   support

            AFTER       0.57      0.53      0.55       399
           BEFORE       0.64      0.61      0.62       581
BEFORE-OR-OVERLAP       0.26      0.40      0.32       128
          OVERLAP       0.67      0.59      0.63      1002
 OVERLAP-OR-AFTER       0.22      0.35      0.27        96
            VAGUE       0.55      0.63      0.59       346

         accuracy                           0.57      2552
        macro avg       0.49      0.52      0.50      2552
     weighted avg       0.59      0.57      0.58      2552

Micro
- recall: 0.5713166144200627
- precsision: 0.5713166144200627
- f1-score: 0.5713166144200627
Macro
- recall: 0.5179617433576472
- precsision: 0.4851673540162245
- f1-score: 0.49563632546831804
Weighted
- recall: 0.5713166144200627
- precsision: 0.5928681135886226
- f1-score: 0.5793390353311142
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.5713166144200627


## tlink-subordinate-events lihtsustatud relatsioonitüübid
IDENTITY, INCLUDES, IS_INCLUDED ja SIMULTANEOUS => OVERLAP

In [53]:
X, y = loo_X_ja_y_sub_events("bert_embeddings_penultimatelayer_fixed", 1)
relatsioonityypide_lihtsustamine(y)
clf5 = make_pipeline(StandardScaler(), MLPClassifier(alpha=1, solver="lbfgs", max_iter=10000, random_state=0))
tulemused = ristvalideeri(clf5, X, y)

split 0
split 1
split 2
split 3
split 4
split 5
split 6
split 7
split 8
split 9


In [54]:
report = classification_report(tulemused[0], tulemused[1])
print(report)
print("Micro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="micro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="micro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="micro"))
print("Macro")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="macro"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="macro"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="macro"))
print("Weighted")
print("- recall:", recall_score(tulemused[0], tulemused[1], average="weighted"))
print("- precsision:", precision_score(tulemused[0], tulemused[1], average="weighted"))
print("- f1-score:", f1_score(tulemused[0], tulemused[1], average="weighted"))

print("TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)")
oigeid = 0
for i in range(len(tulemused[0])):
  if tulemused[0][i] == tulemused[1][i]:
    oigeid += 1
print("Skoor:", str(oigeid / len(tulemused[0])))

                   precision    recall  f1-score   support

            AFTER       0.54      0.56      0.55       377
           BEFORE       0.58      0.54      0.56       689
BEFORE-OR-OVERLAP       0.37      0.40      0.38       382
          OVERLAP       0.56      0.53      0.54      1038
 OVERLAP-OR-AFTER       0.16      0.26      0.20        89
            VAGUE       0.41      0.40      0.40       552

         accuracy                           0.49      3127
        macro avg       0.44      0.45      0.44      3127
     weighted avg       0.50      0.49      0.49      3127

Micro
- recall: 0.4912056283978254
- precsision: 0.4912056283978254
- f1-score: 0.4912056283978254
Macro
- recall: 0.4496777065762109
- precsision: 0.43657342359527523
- f1-score: 0.44103176926006754
Weighted
- recall: 0.4912056283978254
- precsision: 0.49991400969542404
- f1-score: 0.49491001409752017
TempEval-2 järgi (õigete vastuste arv / kogu vastuste arv)
Skoor: 0.4912056283978254
