In [1]:
from models.knn import KNN
from models.linear import Linear
from models.naive_bayes import NaiveBayes
from models.word2vec import W2V

from src.criterion import get_recall, best_k
from src.data import get_train_test_split, regexp_processing, get_sentences

from src.constants import constants
CLASSES = constants["classes"]

In [2]:
train, test = get_train_test_split()
train_proc, test_proc = get_train_test_split(transform=regexp_processing)
sentences = get_sentences()

## KNN
---

In [13]:
knn_model = KNN(k=3, classes=CLASSES)
knn_model.fit(train_proc)

In [15]:
print(f"Recall @1,2,3 - {get_recall(knn_model, test)}")

Recall @1,2,3 - (0.84, 0.92, 0.98)


## Naive Bayes
---

In [19]:
naive_bayes_model = NaiveBayes(n=2, classes=CLASSES)
naive_bayes_model.fit(train)

100%|██████████| 198/198 [00:00<00:00, 747.72it/s]


In [21]:
print(f"Recall @1,2,3 - {get_recall(naive_bayes_model, test)}")

Recall @1,2,3 - (0.74, 0.76, 0.8)


In [5]:
naive_bayes_model.eval(test_proc)

100%|██████████| 50/50 [00:00<00:00, 1324.18it/s]

Accuracy: 0.66 | Errors: 17





(0.66,
 [('BAL', 'NLRTM', 'FRSML'),
  ('BREST DRYDOCK#2', 'FRBES', 'FRSML'),
  ('ES  SDR', 'ESSDR', 'FRSML'),
  ('DUKIRK, FRANCE', 'FRIRK', 'FRBOD'),
  ('FROM GBSOU TO ESSDR', 'ESSDR', 'FRSML'),
  ('ES SDR', 'ESSDR', 'FRSML'),
  ('DUNKERQUE', 'FRIRK', 'FRSML'),
  ('FR NTE', 'NLRTM', 'FRSML'),
  ('FR DON', 'BEANR', 'FRBOD'),
  ('FR NTE', 'NLRTM', 'FRSML'),
  ('TRGEB', 'FRLEH', 'FRSML'),
  ('ESSDR', 'ESSDR', 'FRSML'),
  ('RU PRI', 'FRBOD', 'FRSML'),
  ('SANTANDER SPAIN', 'ESSDR', 'FRSML'),
  ('DUNKIRK,FRANCE', 'FRIRK', 'FRBOD'),
  ('IT PVE', 'BEANR', 'FRSML'),
  ('SANTANDER, SPAIN', 'ESSDR', 'FRSML')])

## Linear Model
---

In [175]:
lm = Linear(classes=CLASSES, max_len=32)
lm.fit(train, epochs=8)

Epoch: 1/8 | Accuracy: 0.5303030303030303 | Loss: 129.32166262610767
Epoch: 2/8 | Accuracy: 0.8282828282828283 | Loss: 49.657216363392585
Epoch: 3/8 | Accuracy: 0.9444444444444444 | Loss: 27.3234969372022
Epoch: 4/8 | Accuracy: 0.9747474747474747 | Loss: 16.434916216722502
Epoch: 5/8 | Accuracy: 0.9797979797979798 | Loss: 10.877555663510917
Epoch: 6/8 | Accuracy: 0.98989898989899 | Loss: 7.621654572660742
Epoch: 7/8 | Accuracy: 1.0 | Loss: 5.541417559193727
Epoch: 8/8 | Accuracy: 1.0 | Loss: 4.147147291845609


([129.32166262610767,
  49.657216363392585,
  27.3234969372022,
  16.434916216722502,
  10.877555663510917,
  7.621654572660742,
  5.541417559193727,
  4.147147291845609],
 [0.5303030303030303,
  0.8282828282828283,
  0.9444444444444444,
  0.9747474747474747,
  0.9797979797979798,
  0.98989898989899,
  1.0,
  1.0])

In [176]:
print(f"Recall @1,2,3 - {get_recall(lm, test)}")

Recall @1,2,3 - (0.74, 0.8, 0.84)


## Word2Vec
---

In [22]:
w2v = W2V(classes=CLASSES)
w2v.fit(sentences)

In [24]:
print(f"Recall @1,2,3 - {get_recall(w2v, test)}")

Recall @1,2,3 - (0.18, 0.28, 0.48)


In [28]:
print(f"Recall @1,2,3 - {get_recall(w2v, test_proc)}")

Recall @1,2,3 - (0.2, 0.28, 0.48)
