In [2]:
import metal
import numpy as np
import torch
import pickle
import scipy.sparse as sparse

from metal.end_model import EndModel
from metal.label_model import LabelModel
from metal.modules import LSTMModule, SparseLinearModule

# Step 1: Load Data

In [43]:
with open("data/cdr_data/Xs_bow", 'rb') as f:
    X_train_bow, X_dev_bow, X_test_bow = pickle.load(f)

In [44]:
with open("data/cdr_data/Ls", 'rb') as f:
    L_train, L_dev = pickle.load(f)

In [45]:
with open("data/cdr_data/Ys", 'rb') as f:
    Y_train, Y_dev, Y_test = pickle.load(f)

In [46]:
# load word sequences for LSTM models
with open("data/cdr_data/Xs_lstm", 'rb') as f:
    X_train_lstm, X_dev_lstm, X_test_lstm = pickle.load(f)

In [1]:
metrics = ['accuracy', 'recall', 'precision', 'f1']

# Step 2: Train Label Model

In [47]:
label_model = LabelModel()
label_model.train_model(L_train, n_epochs=200, print_every=50, seed=123)
label_model.score((L_dev, Y_dev), metric=metrics)

Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.375
[E:50]	Train Loss: 0.036
[E:100]	Train Loss: 0.028
[E:150]	Train Loss: 0.028
[E:199]	Train Loss: 0.028
Finished Training
Accuracy: 0.650
Recall: 0.679
Precision: 0.769
F1: 0.721
        y=1    y=2   
 l=1    402    121   
 l=2    190    175   


[0.6497747747747747, 0.6790540540540541, 0.768642447418738, 0.7210762331838565]

In [52]:
Y_train_ps = label_model.predict_proba(L_train)

# Step 4: Train End Model

##  1. BoW Model

In [59]:
end_model = EndModel([9946,100, 2], seed=123)


Network architecture:
Sequential(
  (0): IdentityModule()
  (1): Sequential(
    (0): Linear(in_features=9946, out_features=100, bias=True)
    (1): ReLU()
  )
  (2): Linear(in_features=100, out_features=2, bias=True)
)



In [60]:
end_model.train_model((X_train_bow, Y_train_ps), n_epochs=5, seed=123, verbose=False)

In [62]:
scores = end_model.score((X_test_bow, Y_test), metric=metrics)

Accuracy: 0.611
Recall: 0.664
Precision: 0.734
F1: 0.697
        y=1    y=2   
 l=1   2068    748   
 l=2   1047    757   


## 2. BOW with Embeddings

In [63]:
import torch.nn as nn

slm = SparseLinearModule(embed_size=50, vocab_size=X_train_bow.shape[1])
end_model_bowe = EndModel([50, 200, 2], input_module=slm, seed=123)
kwargs = {
    'optimizer_config': {'optimizer': 'sgd'},
    'validation_metric': 'f1',
    'n_epochs': 5
}
end_model_bowe.train_model((torch.tensor(X_train_bow, dtype=torch.long), Y_train_ps), seed=123, **kwargs)


Network architecture:
Sequential(
  (0): Sequential(
    (0): SparseLinearModule(
      (W): Embedding(9946, 50, padding_idx=0, sparse=True)
    )
    (1): ReLU()
  )
  (1): Sequential(
    (0): Linear(in_features=50, out_features=200, bias=True)
    (1): ReLU()
  )
  (2): Linear(in_features=200, out_features=2, bias=True)
)



100%|██████████| 259/259 [00:13<00:00, 19.27it/s, avg_loss=1.4]  

[E:0]	Train Loss: 0.700



100%|██████████| 259/259 [00:14<00:00, 17.87it/s, avg_loss=1.39] 

[E:1]	Train Loss: 0.696



100%|██████████| 259/259 [00:14<00:00, 18.49it/s, avg_loss=1.39] 


[E:2]	Train Loss: 0.697


100%|██████████| 259/259 [00:14<00:00, 18.26it/s, avg_loss=1.39] 

[E:3]	Train Loss: 0.698



100%|██████████| 259/259 [00:13<00:00, 18.78it/s, avg_loss=1.39] 


[E:4]	Train Loss: 0.695
Finished Training


In [40]:
end_model_bowe.score((torch.tensor(X_test_bow, dtype=torch.long), Y_test), metric=metrics)

Accuracy: 0.674
Recall: 1.000
Precision: 0.674
F1: 0.805
        y=1    y=2   
 l=1   3115   1505   
 l=2     0      0    


[0.6742424242424242, 1.0, 0.6742424242424242, 0.8054298642533936]

## 3. LSTM  (Random Initalized Embeddings)

In [42]:
vocab_size = X_train_bow.shape[1]
lstm = LSTMModule(embed_size=50,
                  hidden_size=100,
                  vocab_size=vocab_size,
                  lstm_reduction='attention',
                  dropout=0,
                  num_layers=1,
                  freeze=False)
end_model_lstm = EndModel([200, 2], input_module=lstm, seed=123)
kwargs = {
    'validation_metric': 'f1',
    'n_epochs': 5
}
end_model_lstm.train_model((X_train_lstm, Y_train_ps), **kwargs)


Using randomly initialized embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)



100%|██████████| 259/259 [01:34<00:00,  3.93it/s, avg_loss=1.18] 

[E:0]	Train Loss: 0.589



100%|██████████| 259/259 [02:56<00:00,  3.64it/s, avg_loss=1.07] 

[E:1]	Train Loss: 0.536



100%|██████████| 259/259 [01:23<00:00,  3.85it/s, avg_loss=1.03] 

[E:2]	Train Loss: 0.518



100%|██████████| 259/259 [01:37<00:00,  3.00it/s, avg_loss=1.02] 

[E:3]	Train Loss: 0.511



100%|██████████| 259/259 [01:32<00:00,  2.81it/s, avg_loss=1.01] 

[E:4]	Train Loss: 0.508
Finished Training





In [26]:
end_model_lstm.score((X_test_lstm, Y_test), metric=metrics)

Accuracy: 0.623
Recall: 0.636
Precision: 0.765
F1: 0.694
        y=1    y=2   
 l=1   1980    609   
 l=2   1135    896   


[0.6225108225108225, 0.6356340288924559, 0.764774044032445, 0.694249649368864]

## 4. LSTM (Pretrained Embeddings)

In [27]:
# load embeddings
with open("data/cdr_data/wembs", 'rb') as f:
    wembs = pickle.load(f)

In [28]:
lstm = LSTMModule(embed_size=50,
                  hidden_size=100,
                  embeddings=wembs,
                  lstm_reduction='attention',
                  dropout=0,
                  num_layers=1,
                  freeze=False)

end_model_lstmpe = EndModel([200, 2], input_module=lstm, seed=123)
kwargs = {
    'validation_metric': 'f1',
    'n_epochs': 5
}

end_model_lstmpe.train_model((X_train_lstm, Y_train_ps), dev_data=(X_dev_lstm, Y_dev), **kwargs)

Using pretrained embeddings.
Embeddings shape = (11406, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(11406, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)



100%|██████████| 259/259 [01:20<00:00,  4.26it/s, avg_loss=1.17] 


Saving model at iteration 0 with best score 0.752
[E:0]	Train Loss: 0.589	Dev score: 0.752


100%|██████████| 259/259 [01:12<00:00,  4.11it/s, avg_loss=1.06] 


[E:1]	Train Loss: 0.532	Dev score: 0.707


100%|██████████| 259/259 [01:13<00:00,  4.20it/s, avg_loss=1.03] 


[E:2]	Train Loss: 0.517	Dev score: 0.715


100%|██████████| 259/259 [01:13<00:00,  3.91it/s, avg_loss=1.02] 


[E:3]	Train Loss: 0.510	Dev score: 0.713


100%|██████████| 259/259 [01:12<00:00,  4.04it/s, avg_loss=1.01] 


[E:4]	Train Loss: 0.507	Dev score: 0.680
Restoring best model from iteration 0 with score 0.752
Finished Training
Accuracy: 0.662
        y=1    y=2   
 l=1    455    163   
 l=2    137    133   


In [29]:
score = end_model_lstmpe.score((X_test_lstm, Y_test), metric=metrics)

Accuracy: 0.635
Recall: 0.697
Precision: 0.746
F1: 0.720
        y=1    y=2   
 l=1   2170    739   
 l=2    945    766   
