# Slicing CDR Relation Extraction 

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import os
import sys
sys.path.append('/dfs/scratch0/vschen/metal')

import metal
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
np.set_printoptions(precision=4, suppress=True)

In [4]:
print('PyTorch: ', torch.__version__)
print('MeTaL:   ', metal.__version__)
print('Python:  ', sys.version)
print('Python:  ', sys.version_info)

PyTorch:  0.4.1
MeTaL:    0.3.3
Python:   3.6.7 (default, Dec  8 2018, 17:35:14) 
[GCC 5.4.0 20160609]
Python:   sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)


## Initalize CDR Dataset
To uncompress the SQLite db: ```bzip2 -d cdr.db.bz2```

In [5]:
from metal.contrib.backends.wrapper import SnorkelDataset
import os

db_conn_str   = os.path.join(os.getcwd(),"cdr.db")
candidate_def = ['ChemicalDisease', ['chemical', 'disease']]

train, dev, test = SnorkelDataset.splits(db_conn_str, 
                                         candidate_def, 
                                         max_seq_len=125)

print(f'[TRAIN] {len(train)}')
print(f'[DEV]   {len(dev)}')
print(f'[TEST]  {len(test)}')

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
[TRAIN] 8272
[DEV]   888
[TEST]  4620


## Get Pretrained Embeddings

Download [GloVe embeddings](http://nlp.stanford.edu/data/glove.6B.zip):
`wget http://nlp.stanford.edu/data/glove.6B.zip \
&& mkdir -p glove.6B \
&& unzip glove.6B.zip -d glove.6B \
&& rm glove.6B.zip`

In [6]:
from embeddings import EmbeddingLoader, load_embeddings
emb_path  = "../glove.6B/glove.6B.50d.txt"
embs  = EmbeddingLoader(emb_path, fmt='text')

## Generate `L_*` to target slices

In [7]:
from labeling_functions import LFs
print ([lf.__name__ for lf in LFs])

['LF_c_cause_d', 'LF_c_d', 'LF_c_induced_d', 'LF_c_treat_d', 'LF_c_treat_d_wide', 'LF_closer_chem', 'LF_closer_dis', 'LF_ctd_marker_c_d', 'LF_ctd_marker_induce', 'LF_ctd_therapy_treat', 'LF_ctd_unspecified_treat', 'LF_ctd_unspecified_induce', 'LF_d_following_c', 'LF_d_induced_by_c', 'LF_d_induced_by_c_tight', 'LF_d_treat_c', 'LF_develop_d_following_c', 'LF_far_c_d', 'LF_far_d_c', 'LF_improve_before_disease', 'LF_in_ctd_therapy', 'LF_in_ctd_marker', 'LF_in_patient_with', 'LF_induce', 'LF_induce_name', 'LF_induced_other', 'LF_level', 'LF_measure', 'LF_neg_d', 'LF_risk_d', 'LF_treat_d', 'LF_uncertain', 'LF_weak_assertions']


In [8]:
%%time 
from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.annotations import LabelAnnotator
labeler = LabelAnnotator(lfs=LFs)
L_train = labeler.apply(split=0)
L_dev = labeler.apply(split=1) # used for debugging
L_test = labeler.apply(split=2) # used for evaluation

from snorkel.learning.structure import DependencySelector
ds = DependencySelector()
deps = ds.select(L_train, threshold=0.1)
from snorkel.learning import GenerativeModel

# need to extract `accs` from gen_model
gen_model = GenerativeModel(lf_propensity=True)
gen_model.train(
    L_train, deps=deps, decay=0.95, step_size=0.1/L_train.shape[0], reg_param=0.0
)

accs = np.array(gen_model.learned_lf_stats()['Accuracy'])
accs[np.isnan(accs)] = 0
accs = np.minimum(accs, 0.999)

gen_marginals = gen_model.marginals(L_train)

Clearing existing...


  0%|          | 6/8272 [00:00<02:26, 56.52it/s]

Running UDF...


100%|██████████| 8272/8272 [00:45<00:00, 183.53it/s]


Clearing existing...


  1%|          | 9/888 [00:00<00:09, 88.13it/s]

Running UDF...


100%|██████████| 888/888 [00:05<00:00, 161.04it/s]
  0%|          | 0/4620 [00:00<?, ?it/s]

Clearing existing...
Running UDF...


100%|██████████| 4620/4620 [00:27<00:00, 169.08it/s]


Inferred cardinality: 2


  "Precision": tp / (tp + fp),
  "Accuracy": (tp + tn) / coverage,


CPU times: user 3min 6s, sys: 816 ms, total: 3min 7s
Wall time: 3min 8s


In [9]:
L = L_train.copy()
L[L==-1] = 2 # convert to multiclass
Y_dev = np.array([ex[1] for ex in dev])

In [10]:
from metal.label_model import LabelModel
label_model = LabelModel(k=2, seed=123)
label_model.train_model(L, Y_dev=Y_dev)
label_model.score((L_dev, Y_dev))

Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.492
[E:10]	Train Loss: 0.184
[E:20]	Train Loss: 0.122
[E:30]	Train Loss: 0.076
[E:40]	Train Loss: 0.055
[E:50]	Train Loss: 0.038
[E:60]	Train Loss: 0.033
[E:70]	Train Loss: 0.030
[E:80]	Train Loss: 0.029
[E:90]	Train Loss: 0.028
[E:99]	Train Loss: 0.028
Finished Training
Accuracy: 0.699
        y=1    y=2   
 l=1    81     52    
 l=2    215    540   


0.6993243243243243

### Weak Labels in Dataset

In [11]:
metal_marginals = label_model.predict_proba(L)
metal_marginals

array([[0.1542, 0.8458],
       [1.    , 0.    ],
       [0.2198, 0.7802],
       ...,
       [0.4359, 0.5641],
       [0.3333, 0.6667],
       [0.9703, 0.0297]])

In [12]:
snorkel_marginals = np.vstack((gen_marginals, 1-gen_marginals)).T
snorkel_marginals

array([[0.3856, 0.6144],
       [0.8905, 0.1095],
       [0.6226, 0.3774],
       ...,
       [0.7244, 0.2756],
       [0.5   , 0.5   ],
       [0.7768, 0.2232]])

In [13]:
from metal.contrib.slicing.sqlite_wrapper \
    import SnorkelDataset as SnorkelSliceDataset

train_metal = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    train_marginals=metal_marginals
)

train_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    train_marginals=snorkel_marginals
)

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db


### Custom Slicing Dataset

In [14]:
train_slice = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense()
)

train_slice_metal = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense(),
    train_marginals=metal_marginals
)

train_slice_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train.todense(),
    train_marginals=snorkel_marginals
)

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/CDR/cdr.db


In [15]:
from metal.contrib.slicing.online_dp import SliceDPModel
from metal.end_model import EndModel
from metal.modules import LSTMModule
def init_model(use_end_model=False, r=None, rw=None):
    wembs = load_embeddings(train.word_dict, embs)
    lstm = LSTMModule(embed_size=50, 
                      hidden_size=100, 
                      embeddings=wembs,
                      lstm_reduction='attention', 
                      dropout=0.0, 
                      num_layers=1, 
                      freeze=False)
    if use_end_model:
        model = EndModel([200, 2], input_module=lstm, seed=123, use_cuda=True)
    else:
        input_layer_config = {
            "input_relu": False,
            "input_batchnorm": False,
            "input_dropout": 0.0,
        }
        model = SliceDPModel(lstm, accs, r, rw, seed=123, use_cuda=True, input_layer_config=input_layer_config)
        
    model.config['train_config']['optimizer_config']['optimizer_common']['lr'] = 0.01
    model.config['train_config']['validation_metric'] = 'f1'
    model.config['train_config']['batch_size'] = 32
    model.config['train_config']['n_epochs'] = 10
    return model

from metal.modules import LSTMModule
from metal.tuners import RandomSearchTuner


def search_slice_weights(train_loader, dev_loader, r, rw, max_search=1):
    wembs = load_embeddings(train.word_dict, embs)
    lstm = LSTMModule(embed_size=50, 
                      hidden_size=100, 
                      embeddings=wembs,
                      lstm_reduction='attention', 
                      dropout=0.0, 
                      num_layers=1, 
                      freeze=False)
    
    searcher = RandomSearchTuner(SliceDPModel, validation_metric='f1')

    search_space = {
        "slice_weight": {"range": [0, 10], "scale": "linear"}
    }

    input_layer_config = {
        "input_relu": False,
        "input_batchnorm": False,
        "input_dropout": 0.0,
    }
    
    trained_model = searcher.search(
        search_space,
        dev_loader,
        train_args=[train_loader],
        init_args=[lstm, accs, r, rw],
        init_kwargs={"use_cuda": True, "input_layer_config": input_layer_config},
        train_kwargs={
            "lr": 0.01,
            "batch_size": 32,
            "n_epochs": 10
        },
        max_search=max_search
    )
    return trained_model

## (a) `Oracle`: EndModel Trained on Full GT

In [18]:
oracle = init_model(use_end_model=True)
%time oracle.train_model(train, dev_data=dev)
oracle.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)

Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.577
[E:0]	Train Loss: 0.550	Dev f1: 0.577


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.605
[E:1]	Train Loss: 0.304	Dev f1: 0.605


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:2]	Train Loss: 0.172	Dev f1: 0.587


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 0.107	Dev f1: 0.590


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.071	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 0.049	Dev f1: 0.584


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.035	Dev f1: 0.582


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.034	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.030	Dev f1: 0.592


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.030	Dev f1: 0.525
Restoring best model from iteration 1 with score 0.605
Finished Training
F1: 0.605
        y=1    y=2   
 l=1    206    179   
 l=2    90     413   
CPU times: user 13min 58s, sys: 16.8 s, total: 14min 15s
Wall time: 14min 9s
Precision: 0.493
Recall: 0.760
F1: 0.598
        y=1    y=2   
 l=1   1144   1175   
 l=2    361   1940   


[0.4933160845191893, 0.7601328903654485, 0.5983263598326359]

## (b) `BaseWeak`: EndModel trained on weak labels

In [19]:
from metal.end_model import EndModel
from metal.modules import LSTMModule

base_weak = init_model(use_end_model=True)
%time base_weak.train_model(train_snorkel, dev_data=dev)
base_weak_scores = base_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(9946, 50)
      (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=200, out_features=2, bias=True)
)

Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.562
[E:0]	Train Loss: 0.657	Dev f1: 0.562


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:1]	Train Loss: 0.636	Dev f1: 0.536


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.566
[E:2]	Train Loss: 0.628	Dev f1: 0.566


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.572
[E:3]	Train Loss: 0.625	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.623	Dev f1: 0.571


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.574
[E:5]	Train Loss: 0.622	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.621	Dev f1: 0.564


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.620	Dev f1: 0.564


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.620	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.619	Dev f1: 0.562
Restoring best model from iteration 5 with score 0.574
Finished Training
F1: 0.574
        y=1    y=2   
 l=1    268    370   
 l=2    28     222   
CPU times: user 13min 25s, sys: 16.4 s, total: 13min 41s
Wall time: 13min 36s
Precision: 0.396
Recall: 0.916
F1: 0.553
        y=1    y=2   
 l=1   1378   2102   
 l=2    127   1013   


## (e) `SliceOursWeak`: Slice Model with $\tilde{Y}$ priors

In [17]:
# slice_ours_weak = init_model(r=200, rw=True)
# %time slice_ours_weak.train_model(train_slice_snorkel, dev_data=dev)
%time slice_ours_weak = search_slice_weights(train_slice_snorkel, dev, r=200, rw=True, max_search=5)

slice_ours_weak_scores = slice_ours_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'


  self.w = torch.from_numpy(np.log(accs / (1-accs))).float()


Slice Heads:
Reweighting: True
Slice Weight: 0.8718667752263232
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
[0] Testing {'slice_weight': 0.8718667752263232}
Could not find kwarg "slice_weight" in destination dict.
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))

  A = F.softmax(self.forward_L(x)).unsqueeze(1)
  return F.softmax(outputs)



Saving model at iteration 0 with best score 0.561
[E:0]	Train Loss: 0.314	Dev f1: 0.561


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.567
[E:1]	Train Loss: 0.308	Dev f1: 0.567


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:2]	Train Loss: 0.305	Dev f1: 0.563


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 0.304	Dev f1: 0.558


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 4 with best score 0.588
[E:4]	Train Loss: 0.303	Dev f1: 0.588


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 0.302	Dev f1: 0.586


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.302	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.301	Dev f1: 0.588


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.301	Dev f1: 0.568


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.301	Dev f1: 0.571
Restoring best model from iteration 4 with score 0.588
Finished Training
F1: 0.588
        y=1    y=2   
 l=1    266    342   
 l=2    30     250   
Slice Heads:
Reweighting: True
Slice Weight: 5.059175693918715
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
[1] Testing {'slice_weight': 5.059175693918715}
Could not find kwarg "slice_weight" in destination dict.
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.550
[E:0]	Train Loss: 1.718	Dev f1: 0.550


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.593
[E:1]	Train Loss: 1.706	Dev f1: 0.593


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:2]	Train Loss: 1.703	Dev f1: 0.575


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 1.701	Dev f1: 0.565


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 1.700	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 1.698	Dev f1: 0.571


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 1.697	Dev f1: 0.557


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 7 with best score 0.596
[E:7]	Train Loss: 1.696	Dev f1: 0.596


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 1.696	Dev f1: 0.560


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 9 with best score 0.599
[E:9]	Train Loss: 1.697	Dev f1: 0.599
Restoring best model from iteration 9 with score 0.599
Finished Training
F1: 0.599
        y=1    y=2   
 l=1    276    350   
 l=2    20     242   
Slice Heads:
Reweighting: True
Slice Weight: 0.8309492273695529
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
[2] Testing {'slice_weight': 0.8309492273695529}
Could not find kwarg "slice_weight" in destination dict.
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.576
[E:0]	Train Loss: 0.289	Dev f1: 0.576


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:1]	Train Loss: 0.287	Dev f1: 0.560


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.579
[E:2]	Train Loss: 0.287	Dev f1: 0.579


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.587
[E:3]	Train Loss: 0.287	Dev f1: 0.587


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.286	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 0.286	Dev f1: 0.573


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.286	Dev f1: 0.586


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.286	Dev f1: 0.586


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.286	Dev f1: 0.570


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.286	Dev f1: 0.559
Restoring best model from iteration 3 with score 0.587
Finished Training
F1: 0.587
        y=1    y=2   
 l=1    271    357   
 l=2    25     235   
Slice Heads:
Reweighting: True
Slice Weight: 0.8291816529523388
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
[3] Testing {'slice_weight': 0.8291816529523388}
Could not find kwarg "slice_weight" in destination dict.
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.574
[E:0]	Train Loss: 0.288	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:1]	Train Loss: 0.286	Dev f1: 0.568


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.583
[E:2]	Train Loss: 0.286	Dev f1: 0.583


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 0.286	Dev f1: 0.575


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 0.286	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:5]	Train Loss: 0.286	Dev f1: 0.566


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 0.286	Dev f1: 0.572


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 0.286	Dev f1: 0.573


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 0.286	Dev f1: 0.569


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 0.286	Dev f1: 0.562
Restoring best model from iteration 2 with score 0.583
Finished Training
F1: 0.583
        y=1    y=2   
 l=1    268    355   
 l=2    28     237   
Slice Heads:
Reweighting: True
Slice Weight: 3.436079603195352
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=400, out_features=2, bias=False)
[4] Testing {'slice_weight': 3.436079603195352}
Could not find kwarg "slice_weight" in destination dict.
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.574
[E:0]	Train Loss: 1.162	Dev f1: 0.574


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.587
[E:1]	Train Loss: 1.155	Dev f1: 0.587


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:2]	Train Loss: 1.155	Dev f1: 0.586


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:3]	Train Loss: 1.154	Dev f1: 0.568


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 1.154	Dev f1: 0.585


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.596
[E:5]	Train Loss: 1.154	Dev f1: 0.596


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 1.154	Dev f1: 0.578


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 1.154	Dev f1: 0.579


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 1.153	Dev f1: 0.567


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 1.154	Dev f1: 0.594
Restoring best model from iteration 5 with score 0.596
Finished Training
F1: 0.596
        y=1    y=2   
 l=1    266    330   
 l=2    30     262   
[SUMMARY]
Best model: [1]
Best config: {'slice_weight': 5.059175693918715}
Best score: 0.5986984815618221
CPU times: user 2h 49min 40s, sys: 3min 43s, total: 2h 53min 24s
Wall time: 2h 52min 55s


  outputs, (h_t, c_t) = self.lstm(X_packed)


Precision: 0.405
Recall: 0.898
F1: 0.558
        y=1    y=2   
 l=1   1351   1983   
 l=2    154   1132   


## (f) `SliceUWWeak`: Unweighted Slice model with $\tilde{Y}$ priors

In [20]:
slice_uw_weak = init_model(r=200, rw=False)
%time slice_uw_weak.train_model(train_slice_snorkel, dev_data=dev)
slice_uw_weak_scores = slice_uw_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 77.0% (7656/9946) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (9946, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Reweighting: False
Slice Weight: 10
Input Network: Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(9946, 50)
    (lstm): LSTM(50, 100, batch_first=True, bidirectional=True)
  )
)
L_head: Linear(in_features=200, out_features=33, bias=False)
Y_head: Linear(in_features=200, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 0 with best score 0.537
[E:0]	Train Loss: 3.494	Dev f1: 0.537


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 1 with best score 0.560
[E:1]	Train Loss: 3.424	Dev f1: 0.560


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 2 with best score 0.562
[E:2]	Train Loss: 3.394	Dev f1: 0.562


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 3 with best score 0.576
[E:3]	Train Loss: 3.379	Dev f1: 0.576


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:4]	Train Loss: 3.369	Dev f1: 0.567


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


Saving model at iteration 5 with best score 0.578
[E:5]	Train Loss: 3.362	Dev f1: 0.578


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:6]	Train Loss: 3.356	Dev f1: 0.556


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:7]	Train Loss: 3.352	Dev f1: 0.568


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:8]	Train Loss: 3.350	Dev f1: 0.564


HBox(children=(IntProgress(value=0, max=259), HTML(value='')))


[E:9]	Train Loss: 3.348	Dev f1: 0.567
Restoring best model from iteration 5 with score 0.578
Finished Training
F1: 0.578
        y=1    y=2   
 l=1    278    388   
 l=2    18     204   
CPU times: user 24min 7s, sys: 28.5 s, total: 24min 35s
Wall time: 24min 30s
Precision: 0.389
Recall: 0.917
F1: 0.546
        y=1    y=2   
 l=1   1380   2169   
 l=2    125    946   


## Slice-specific scores

In [21]:
# TODO: don't call private fns
Yp_oracle, Y = oracle._get_predictions(test)
Yp_base_weak, Y = base_weak._get_predictions(test)
Yp_slice_ours_weak, Y = slice_ours_weak._get_predictions(test)
Yp_slice_uw_weak, Y = slice_uw_weak._get_predictions(test)

#### `slice_ours` (re-weighting, accuracy priors) vs. `base_weak` (end_model trained on weak labels)

In [22]:
L_test = L_test.todense()

In [24]:
from metal.contrib.slicing.experiment_utils import compare_LF_slices

#### `slice_ours_weak` (slice model with weak priors + reweighting) vs. `base_weak` (end_model trained on weak labels)

In [35]:
compare_LF_slices(Yp_slice_ours_weak, Yp_base_weak,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.05)

[32m[LF_closer_dis] delta: 0.1299, OURS: 0.5195, BASE: 0.3896[0m
[31m[LF_ctd_therapy_treat] delta: -0.1036, OURS: 0.6574, BASE: 0.7610[0m
[31m[LF_d_treat_c] delta: -0.0667, OURS: 0.4727, BASE: 0.5394[0m
[32m[LF_far_d_c] delta: 0.1319, OURS: 0.6043, BASE: 0.4724[0m
[32m[LF_improve_before_disease] delta: 0.1111, OURS: 0.4444, BASE: 0.3333[0m
[31m[LF_induced_other] delta: -0.0791, OURS: 0.5311, BASE: 0.6102[0m
[31m[LF_measure] delta: -0.0909, OURS: 0.6364, BASE: 0.7273[0m
[31m[LF_neg_d] delta: -0.1071, OURS: 0.2857, BASE: 0.3929[0m
[31m[LF_risk_d] delta: -0.0714, OURS: 0.3571, BASE: 0.4286[0m
[32m[LF_treat_d] delta: 0.2222, OURS: 0.6239, BASE: 0.4017[0m
[31m[LF_uncertain] delta: -0.0581, OURS: 0.6395, BASE: 0.6977[0m
improved 4/33


#### `slice_ours_weak` vs. `Yp_slice_uw_weak` (unweighted slice model)

In [30]:
compare_LF_slices(Yp_slice_ours_weak, Yp_slice_uw_weak,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.05)

[32m[LF_c_treat_d] delta: 0.1617, OURS: 0.7277, BASE: 0.5660[0m
[32m[LF_c_treat_d_wide] delta: 0.1236, OURS: 0.6854, BASE: 0.5618[0m
[32m[LF_closer_chem] delta: 0.0863, OURS: 0.4994, BASE: 0.4131[0m
[32m[LF_closer_dis] delta: 0.1558, OURS: 0.5195, BASE: 0.3636[0m
[32m[LF_ctd_therapy_treat] delta: 0.1394, OURS: 0.6574, BASE: 0.5179[0m
[32m[LF_ctd_unspecified_treat] delta: 0.1073, OURS: 0.6851, BASE: 0.5779[0m
[31m[LF_d_treat_c] delta: -0.1273, OURS: 0.4727, BASE: 0.6000[0m
[32m[LF_far_c_d] delta: 0.0877, OURS: 0.5728, BASE: 0.4851[0m
[31m[LF_improve_before_disease] delta: -0.2222, OURS: 0.4444, BASE: 0.6667[0m
[31m[LF_in_patient_with] delta: -0.3333, OURS: 0.2222, BASE: 0.5556[0m
[32m[LF_induce] delta: 0.0693, OURS: 0.6832, BASE: 0.6139[0m
[31m[LF_level] delta: -0.0526, OURS: 0.3421, BASE: 0.3947[0m
[31m[LF_neg_d] delta: -0.0714, OURS: 0.2857, BASE: 0.3571[0m
[32m[LF_treat_d] delta: 0.0598, OURS: 0.6239, BASE: 0.5641[0m
improved 9/33


#### `slice_ours_weak` vs. `Yp_slice_uw_weak` (unweighted slice model)

In [32]:
compare_LF_slices(Yp_slice_ours_weak, Yp_oracle,
                  Y, L_test, LFs, metric='accuracy', delta_threshold=0.1)

[31m[LF_c_cause_d] delta: -0.1060, OURS: 0.6093, BASE: 0.7152[0m
[31m[LF_closer_chem] delta: -0.1482, OURS: 0.4994, BASE: 0.6476[0m
[31m[LF_closer_dis] delta: -0.1818, OURS: 0.5195, BASE: 0.7013[0m
[31m[LF_d_treat_c] delta: -0.2485, OURS: 0.4727, BASE: 0.7212[0m
[32m[LF_develop_d_following_c] delta: 0.5000, OURS: 1.0000, BASE: 0.5000[0m
[31m[LF_in_ctd_therapy] delta: -0.1238, OURS: 0.5383, BASE: 0.6621[0m
[31m[LF_in_patient_with] delta: -0.5556, OURS: 0.2222, BASE: 0.7778[0m
[31m[LF_level] delta: -0.3947, OURS: 0.3421, BASE: 0.7368[0m
[31m[LF_measure] delta: -0.3636, OURS: 0.6364, BASE: 1.0000[0m
[31m[LF_neg_d] delta: -0.3036, OURS: 0.2857, BASE: 0.5893[0m
[31m[LF_treat_d] delta: -0.1111, OURS: 0.6239, BASE: 0.7350[0m
[31m[LF_weak_assertions] delta: -0.1042, OURS: 0.5455, BASE: 0.6497[0m
improved 1/33
