# Slicing CDR Relation Extraction 

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [4]:
import os
import sys
sys.path.append('/dfs/scratch0/vschen/metal')

import metal
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
np.set_printoptions(precision=4, suppress=True)

In [5]:
print('PyTorch: ', torch.__version__)
print('MeTaL:   ', metal.__version__)
print('Python:  ', sys.version)
print('Python:  ', sys.version_info)

PyTorch:  0.4.1
MeTaL:    0.3.3
Python:   3.6.7 (default, Dec  8 2018, 17:35:14) 
[GCC 5.4.0 20160609]
Python:   sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)


## Initalize CDR Dataset
To uncompress the SQLite db: ```bzip2 -d cdr.db.bz2```

In [6]:
from metal.contrib.backends.wrapper import SnorkelDataset
import os

db_conn_str   = os.path.join(os.getcwd(),"cdr.db")
candidate_def = ['ChemicalDisease', ['chemical', 'disease']]

train, dev, test = SnorkelDataset.splits(db_conn_str, 
                                         candidate_def, 
                                         max_seq_len=125)

print(f'[TRAIN] {len(train)}')
print(f'[DEV]   {len(dev)}')
print(f'[TEST]  {len(test)}')

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
[TRAIN] 8272
[DEV]   888
[TEST]  4620


## Get Pretrained Embeddings

Download [GloVe embeddings](http://nlp.stanford.edu/data/glove.6B.zip):
`wget http://nlp.stanford.edu/data/glove.6B.zip \
&& mkdir -p glove.6B \
&& unzip glove.6B.zip -d glove.6B \
&& rm glove.6B.zip`

In [7]:
from embeddings import EmbeddingLoader, load_embeddings
emb_path  = "../glove.6B/glove.6B.50d.txt"
embs  = EmbeddingLoader(emb_path, fmt='text')

## Generate `L_*` to target slices

In [8]:
from labeling_functions import LFs
print ([lf.__name__ for lf in LFs])

['LF_c_cause_d', 'LF_c_d', 'LF_c_induced_d', 'LF_c_treat_d', 'LF_c_treat_d_wide', 'LF_closer_chem', 'LF_closer_dis', 'LF_ctd_marker_c_d', 'LF_ctd_marker_induce', 'LF_ctd_therapy_treat', 'LF_ctd_unspecified_treat', 'LF_ctd_unspecified_induce', 'LF_d_following_c', 'LF_d_induced_by_c', 'LF_d_induced_by_c_tight', 'LF_d_treat_c', 'LF_develop_d_following_c', 'LF_far_c_d', 'LF_far_d_c', 'LF_improve_before_disease', 'LF_in_ctd_therapy', 'LF_in_ctd_marker', 'LF_in_patient_with', 'LF_induce', 'LF_induce_name', 'LF_induced_other', 'LF_level', 'LF_measure', 'LF_neg_d', 'LF_risk_d', 'LF_treat_d', 'LF_uncertain', 'LF_weak_assertions']


In [9]:
%%time 
from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.annotations import LabelAnnotator
labeler = LabelAnnotator(lfs=LFs)
L_train = labeler.apply(split=0)
L_dev = labeler.apply(split=1) # used for debugging
L_test = labeler.apply(split=2) # used for evaluation

from snorkel.learning.structure import DependencySelector
ds = DependencySelector()
deps = ds.select(L_train, threshold=0.1)
from snorkel.learning import GenerativeModel

# need to extract `accs` from gen_model
gen_model = GenerativeModel(lf_propensity=True)
gen_model.train(
    L_train, deps=deps, decay=0.95, step_size=0.1/L_train.shape[0], reg_param=0.0
)

accs = np.array(gen_model.learned_lf_stats()['Accuracy'])
accs[np.isnan(accs)] = 0
accs = np.minimum(accs, 0.999)

gen_marginals = gen_model.marginals(L_train)

Clearing existing...


  0%|          | 6/8272 [00:00<02:20, 58.98it/s]

Running UDF...


100%|██████████| 8272/8272 [00:53<00:00, 155.70it/s]


Clearing existing...


  1%|          | 7/888 [00:00<00:12, 69.80it/s]

Running UDF...


100%|██████████| 888/888 [00:04<00:00, 217.25it/s]
  0%|          | 0/4620 [00:00<?, ?it/s]

Clearing existing...
Running UDF...


100%|██████████| 4620/4620 [00:22<00:00, 205.85it/s]


Inferred cardinality: 2


  "Precision": tp / (tp + fp),
  "Accuracy": (tp + tn) / coverage,


CPU times: user 3min 5s, sys: 932 ms, total: 3min 5s
Wall time: 3min 7s


In [10]:
L = L_train.copy()
L[L==-1] = 2 # convert to multiclass
Y_dev = np.array([ex[1] for ex in dev])

In [11]:
from metal.label_model import LabelModel
label_model = LabelModel(k=2, seed=123)
label_model.train_model(L, Y_dev=Y_dev)
label_model.score((L_dev, Y_dev))

Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.492
[E:10]	Train Loss: 0.184
[E:20]	Train Loss: 0.122
[E:30]	Train Loss: 0.076
[E:40]	Train Loss: 0.055
[E:50]	Train Loss: 0.038
[E:60]	Train Loss: 0.033
[E:70]	Train Loss: 0.030
[E:80]	Train Loss: 0.029
[E:90]	Train Loss: 0.028
[E:99]	Train Loss: 0.028
Finished Training
Accuracy: 0.699
        y=1    y=2   
 l=1    81     52    
 l=2    215    540   


0.6993243243243243

### Weak Labels in Dataset

In [12]:
metal_marginals = label_model.predict_proba(L)
metal_marginals

array([[0.1542, 0.8458],
       [1.    , 0.    ],
       [0.2198, 0.7802],
       ...,
       [0.4359, 0.5641],
       [0.3333, 0.6667],
       [0.9703, 0.0297]])

In [13]:
snorkel_marginals = np.vstack((gen_marginals, 1-gen_marginals)).T
snorkel_marginals

array([[0.3856, 0.6144],
       [0.8905, 0.1095],
       [0.6226, 0.3774],
       ...,
       [0.7244, 0.2756],
       [0.5   , 0.5   ],
       [0.7768, 0.2232]])

In [14]:
from metal.contrib.slicing.sqlite_wrapper \
    import SnorkelDataset as SnorkelSliceDataset

train_metal = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    train_marginals=metal_marginals
)

train_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    train_marginals=snorkel_marginals
)

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db


### Custom Slicing Dataset

In [15]:
train_slice = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense()
)

train_slice_metal = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense(),
    train_marginals=metal_marginals
)

train_slice_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense(),
    train_marginals=snorkel_marginals
)

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db


In [16]:
def init_model(use_end_model=False, r=None, rw=None):
    wembs = load_embeddings(train.word_dict, embs)
    lstm = LSTMModule(embed_size=50, 
                      hidden_size=100, 
                      embeddings=wembs,
                      lstm_reduction='attention', 
                      dropout=0.0, 
                      num_layers=1, 
                      freeze=False)
    if use_end_model:
        model = EndModel([200, 2], input_module=lstm, seed=123, use_cuda=use_cuda)
    else:
        model = SliceDPModel(lstm, accs, r, rw, seed=123, use_cuda=True)

    model.config['train_config']['optimizer_config']['optimizer_common']['lr'] = 0.01
    model.config['train_config']['validation_metric'] = 'f1'
    model.config['train_config']['batch_size'] = 32
    model.config['train_config']['n_epochs'] = 10
    return model

## (a) `Oracle`: EndModel Trained on Full GT

In [17]:
from metal.end_model import EndModel
from metal.modules import LSTMModule

oracle = init_model(use_end_model=True)
%time oracle.train_model(train, dev_data=dev)
oracle.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)

Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.585
[E:0]	Train Loss: 0.538	Dev f1: 0.585


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.610
[E:1]	Train Loss: 0.284	Dev f1: 0.610


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:2]	Train Loss: 0.163	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 0.112	Dev f1: 0.571


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.075	Dev f1: 0.586


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 0.054	Dev f1: 0.590


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.042	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.045	Dev f1: 0.549


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.038	Dev f1: 0.566


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.027	Dev f1: 0.557
Restoring best model from iteration 1 with score 0.610
Finished Training
F1: 0.610
        y=1    y=2   
 l=1    201    162   
 l=2    95     430   
CPU times: user 11min 23s, sys: 22.9 s, total: 11min 46s
Wall time: 11min 42s
Precision: 0.503
Recall: 0.738
F1: 0.598
        y=1    y=2   
 l=1   1111   1098   
 l=2    394   2017   


[0.5029425079221367, 0.7382059800664452, 0.598276790522348]

## (b) `BaseWeak`: EndModel trained on weak labels

In [18]:
from metal.end_model import EndModel
from metal.modules import LSTMModule

base_weak = init_model(use_end_model=True)
%time base_weak.train_model(train_snorkel, dev_data=dev)
base_weak_scores = base_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)

Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.562
[E:0]	Train Loss: 0.657	Dev f1: 0.562


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:1]	Train Loss: 0.636	Dev f1: 0.536


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.566
[E:2]	Train Loss: 0.628	Dev f1: 0.566


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.572
[E:3]	Train Loss: 0.625	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.623	Dev f1: 0.571


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.574
[E:5]	Train Loss: 0.622	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.621	Dev f1: 0.564


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.620	Dev f1: 0.564


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.620	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.619	Dev f1: 0.562
Restoring best model from iteration 5 with score 0.574
Finished Training
F1: 0.574
        y=1    y=2   
 l=1    268    370   
 l=2    28     222   
CPU times: user 11min 41s, sys: 20.6 s, total: 12min 2s
Wall time: 11min 57s
Precision: 0.396
Recall: 0.916
F1: 0.553
        y=1    y=2   
 l=1   1378   2102   
 l=2    127   1013   


## (c) `SliceUW`: Unweighted SliceModel with `rw=False`

In [19]:
from metal.contrib.slicing.online_dp import SliceDPModel, LinearModule

In [20]:
slice_uw = init_model(r=200, rw=False)
%time slice_uw.train_model(train_slice, dev_data=dev)
slice_uw_scores = slice_uw.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Reweighting: False
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=200, out_features=2, bias=False)
Using GPU...


  self.w = torch.from_numpy(np.log(accs / (1-accs))).float()


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))

  return F.softmax(outputs)



Saving model at iteration 0 with best score 0.537
[E:0]	Train Loss: 3.501	Dev f1: 0.537


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.562
[E:1]	Train Loss: 3.430	Dev f1: 0.562


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.581
[E:2]	Train Loss: 3.399	Dev f1: 0.581


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.615
[E:3]	Train Loss: 3.383	Dev f1: 0.615


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 3.372	Dev f1: 0.599


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 3.364	Dev f1: 0.611


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 3.357	Dev f1: 0.601


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 3.353	Dev f1: 0.606


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 3.351	Dev f1: 0.582


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 3.349	Dev f1: 0.591
Restoring best model from iteration 3 with score 0.615
Finished Training
F1: 0.615
        y=1    y=2   
 l=1    260    290   
 l=2    36     302   
CPU times: user 22min 4s, sys: 33.4 s, total: 22min 38s
Wall time: 22min 32s
Precision: 0.423
Recall: 0.835
F1: 0.562
        y=1    y=2   
 l=1   1257   1715   
 l=2    248   1400   


## (d) `SliceOurs`: Attention SliceModel with `rw=True`

In [21]:
slice_ours = init_model(r=200, rw=True)
%time slice_ours.train_model(train_slice, dev_data=dev)
slice_ours_scores = slice_ours.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Reweighting: True
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))

  A = F.softmax(self.forward_L(x)).unsqueeze(1)



Saving model at iteration 0 with best score 0.524
[E:0]	Train Loss: 3.496	Dev f1: 0.524


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.537
[E:1]	Train Loss: 3.425	Dev f1: 0.537


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.573
[E:2]	Train Loss: 3.397	Dev f1: 0.573


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 3.379	Dev f1: 0.565


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 4 with best score 0.577
[E:4]	Train Loss: 3.369	Dev f1: 0.577


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.585
[E:5]	Train Loss: 3.362	Dev f1: 0.585


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 3.358	Dev f1: 0.571


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 3.354	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 3.350	Dev f1: 0.541


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 3.347	Dev f1: 0.555
Restoring best model from iteration 5 with score 0.585
Finished Training
F1: 0.585
        y=1    y=2   
 l=1    273    364   
 l=2    23     228   
CPU times: user 31min 52s, sys: 51.4 s, total: 32min 43s
Wall time: 32min 38s
Precision: 0.394
Recall: 0.888
F1: 0.545
        y=1    y=2   
 l=1   1337   2060   
 l=2    168   1055   


## (e) `SliceOursWeak`: Slice Model with $\tilde{Y}$ priors

In [22]:
slice_ours_weak = init_model(r=200, rw=True)
%time slice_ours_weak.train_model(train_slice_snorkel, dev_data=dev)
slice_ours_weak_scores = slice_ours_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Reweighting: True
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
Using GPU...


  self.w = torch.from_numpy(np.log(accs / (1-accs))).float()


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))

  # "confidence" of each LF on each example
  



Saving model at iteration 0 with best score 0.526
[E:0]	Train Loss: 3.499	Dev f1: 0.526


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.544
[E:1]	Train Loss: 3.431	Dev f1: 0.544


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.587
[E:2]	Train Loss: 3.399	Dev f1: 0.587


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.591
[E:3]	Train Loss: 3.382	Dev f1: 0.591


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 4 with best score 0.602
[E:4]	Train Loss: 3.370	Dev f1: 0.602


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.603
[E:5]	Train Loss: 3.362	Dev f1: 0.603


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 3.356	Dev f1: 0.588


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 3.353	Dev f1: 0.596


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 3.350	Dev f1: 0.581


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 3.346	Dev f1: 0.579
Restoring best model from iteration 5 with score 0.603
Finished Training
F1: 0.603
        y=1    y=2   
 l=1    257    299   
 l=2    39     293   
CPU times: user 32min 15s, sys: 50.5 s, total: 33min 5s
Wall time: 33min
Precision: 0.419
Recall: 0.869
F1: 0.565
        y=1    y=2   
 l=1   1308   1817   
 l=2    197   1298   


## (f) `SliceUWWeak`: Unweighted Slice model with $\tilde{Y}$ priors

In [23]:
slice_uw_weak = init_model(r=200, rw=False)
%time slice_uw_weak.train_model(train_slice_snorkel, dev_data=dev)
slice_uw_weak_scores = slice_uw_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Reweighting: False
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=200, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.536
[E:0]	Train Loss: 3.499	Dev f1: 0.536


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.553
[E:1]	Train Loss: 3.428	Dev f1: 0.553


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.561
[E:2]	Train Loss: 3.397	Dev f1: 0.561


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.572
[E:3]	Train Loss: 3.379	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 4 with best score 0.576
[E:4]	Train Loss: 3.368	Dev f1: 0.576


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.594
[E:5]	Train Loss: 3.361	Dev f1: 0.594


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 3.357	Dev f1: 0.582


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 3.353	Dev f1: 0.578


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 3.350	Dev f1: 0.584


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 3.348	Dev f1: 0.578
Restoring best model from iteration 5 with score 0.594
Finished Training
F1: 0.594
        y=1    y=2   
 l=1    270    343   
 l=2    26     249   
CPU times: user 21min 44s, sys: 34.8 s, total: 22min 19s
Wall time: 22min 14s
Precision: 0.406
Recall: 0.865
F1: 0.552
        y=1    y=2   
 l=1   1302   1908   
 l=2    203   1207   


## Slice-specific scores

In [24]:
# TODO: don't call private fns
Yp_oracle, Y = oracle._get_predictions(test)
Yp_base_weak, Y = base_weak._get_predictions(test)
Yp_slice_uw, Y = slice_uw._get_predictions(test)
Yp_slice_ours, Y = slice_ours._get_predictions(test)
Yp_slice_ours_weak, Y = slice_ours_weak._get_predictions(test)
Yp_slice_uw_weak, Y = slice_uw_weak._get_predictions(test)

#### `slice_ours` (re-weighting, accuracy priors) vs. `base_weak` (end_model trained on weak labels)

In [25]:
L_test = L_test.todense()

In [26]:
from metal.contrib.slicing.experiment_utils import compare_LF_slices
compare_LF_slices(Yp_slice_ours, Yp_base_weak, 
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.1)

[31m[LF_c_treat_d] delta: -0.1106, OURS: 0.6340, BASE: 0.7447[0m
[31m[LF_ctd_therapy_treat] delta: -0.1594, OURS: 0.6016, BASE: 0.7610[0m
[31m[LF_ctd_unspecified_treat] delta: -0.1038, OURS: 0.6055, BASE: 0.7093[0m
[31m[LF_in_patient_with] delta: -0.1111, OURS: 0.1111, BASE: 0.2222[0m
[32m[LF_level] delta: 0.1316, OURS: 0.4474, BASE: 0.3158[0m
improved 1/33


#### `slice_ours_weak` (slice model with weak priors + reweighting) vs. `base_weak` (end_model trained on weak labels)

In [27]:
compare_LF_slices(Yp_slice_ours_weak, Yp_base_weak,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.1)

[32m[LF_closer_dis] delta: 0.2208, OURS: 0.6104, BASE: 0.3896[0m
[31m[LF_ctd_therapy_treat] delta: -0.1315, OURS: 0.6295, BASE: 0.7610[0m
[32m[LF_far_d_c] delta: 0.1626, OURS: 0.6350, BASE: 0.4724[0m
[32m[LF_improve_before_disease] delta: 0.3333, OURS: 0.6667, BASE: 0.3333[0m
[31m[LF_in_patient_with] delta: -0.1111, OURS: 0.1111, BASE: 0.2222[0m
[32m[LF_level] delta: 0.1316, OURS: 0.4474, BASE: 0.3158[0m
[32m[LF_treat_d] delta: 0.2393, OURS: 0.6410, BASE: 0.4017[0m
improved 5/33


#### `slice_ours_weak` vs. `oracle` (trained on full GT)

In [28]:
compare_LF_slices(Yp_slice_ours, Yp_oracle,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.05)

[31m[LF_c_d] delta: -0.0609, OURS: 0.6604, BASE: 0.7213[0m
[31m[LF_c_treat_d] delta: -0.1234, OURS: 0.6340, BASE: 0.7574[0m
[31m[LF_c_treat_d_wide] delta: -0.1303, OURS: 0.6225, BASE: 0.7528[0m
[31m[LF_closer_chem] delta: -0.2527, OURS: 0.4313, BASE: 0.6841[0m
[31m[LF_closer_dis] delta: -0.2468, OURS: 0.4545, BASE: 0.7013[0m
[31m[LF_ctd_therapy_treat] delta: -0.1235, OURS: 0.6016, BASE: 0.7251[0m
[31m[LF_ctd_unspecified_treat] delta: -0.1419, OURS: 0.6055, BASE: 0.7474[0m
[31m[LF_d_induced_by_c] delta: -0.0508, OURS: 0.6215, BASE: 0.6723[0m
[31m[LF_d_treat_c] delta: -0.2424, OURS: 0.5333, BASE: 0.7758[0m
[32m[LF_develop_d_following_c] delta: 0.2500, OURS: 1.0000, BASE: 0.7500[0m
[31m[LF_far_c_d] delta: -0.1866, OURS: 0.4832, BASE: 0.6698[0m
[31m[LF_far_d_c] delta: -0.1135, OURS: 0.5675, BASE: 0.6810[0m
[31m[LF_improve_before_disease] delta: -0.4444, OURS: 0.3333, BASE: 0.7778[0m
[31m[LF_in_ctd_therapy] delta: -0.1618, OURS: 0.5190, BASE: 0.6807[0m
[31m[LF_i

#### `slice_ours` vs. `Yp_slice_uw` (unweighted slice model)

In [29]:
compare_LF_slices(Yp_slice_ours, Yp_slice_uw,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.05)

[31m[LF_c_treat_d] delta: -0.0681, OURS: 0.6340, BASE: 0.7021[0m
[31m[LF_closer_chem] delta: -0.1495, OURS: 0.4313, BASE: 0.5808[0m
[31m[LF_d_treat_c] delta: -0.0788, OURS: 0.5333, BASE: 0.6121[0m
[31m[LF_far_c_d] delta: -0.1007, OURS: 0.4832, BASE: 0.5840[0m
[31m[LF_far_d_c] delta: -0.0890, OURS: 0.5675, BASE: 0.6564[0m
[31m[LF_improve_before_disease] delta: -0.2222, OURS: 0.3333, BASE: 0.5556[0m
[31m[LF_in_ctd_therapy] delta: -0.0780, OURS: 0.5190, BASE: 0.5970[0m
[31m[LF_in_patient_with] delta: -0.6667, OURS: 0.1111, BASE: 0.7778[0m
[31m[LF_induced_other] delta: -0.1017, OURS: 0.5198, BASE: 0.6215[0m
[31m[LF_measure] delta: -0.0909, OURS: 0.6364, BASE: 0.7273[0m
[31m[LF_neg_d] delta: -0.0714, OURS: 0.3750, BASE: 0.4464[0m
[31m[LF_risk_d] delta: -0.1429, OURS: 0.4286, BASE: 0.5714[0m
[31m[LF_treat_d] delta: -0.1282, OURS: 0.4786, BASE: 0.6068[0m
[31m[LF_weak_assertions] delta: -0.0576, OURS: 0.5477, BASE: 0.6053[0m
improved 0/33


#### `slice_ours_weak` vs. `Yp_slice_uw_weak` (unweighted slice model)

In [30]:
compare_LF_slices(Yp_slice_ours_weak, Yp_slice_uw_weak,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.05)

[32m[LF_c_treat_d_wide] delta: 0.0607, OURS: 0.6652, BASE: 0.6045[0m
[32m[LF_closer_dis] delta: 0.0649, OURS: 0.6104, BASE: 0.5455[0m
[32m[LF_improve_before_disease] delta: 0.2222, OURS: 0.6667, BASE: 0.4444[0m
[32m[LF_level] delta: 0.1053, OURS: 0.4474, BASE: 0.3421[0m
[31m[LF_measure] delta: -0.0909, OURS: 0.6364, BASE: 0.7273[0m
improved 4/33
