In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import sys
sys.path.append('/dfs/scratch0/vschen/metal')
import metal
import torch
from torch.utils.data import Dataset, DataLoader

In [3]:
from metal.contrib.slicing.online_dp import SliceDPModel, LinearModule
from metal.contrib.slicing.sqlite_wrapper import SnorkelDataset

In [4]:
print('PyTorch: ', torch.__version__)
print('MeTaL:   ', metal.__version__)
print('Python:  ', sys.version)
print('Python:  ', sys.version_info)

PyTorch:  0.4.1
MeTaL:    0.3.3
Python:   3.6.7 (default, Dec  8 2018, 17:35:14) 
[GCC 5.4.0 20160609]
Python:   sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)


In [5]:

db_conn_str   = os.path.join(os.getcwd(),"spouses.db")
candidate_def = ['Spouse', ['person1', 'person2']]


train, dev, test = SnorkelDataset.splits(db_conn_str, 
                                         candidate_def, 
                                         max_seq_len=125)

print(f'[TRAIN] {len(train)}')
print(f'[DEV]   {len(dev)}')
print(f'[TEST]  {len(test)}')

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db
[TRAIN] 22254
[DEV]   2811
[TEST]  2701


In [6]:
import numpy as np
snorkel_data = np.load('snorkel_data_spouse.npz')
L_train = snorkel_data['L_train']
L_dev = snorkel_data['L_dev']
L_test = snorkel_data['L_test']
train_marginals = snorkel_data['train_marginals']
dev_marginals = snorkel_data['dev_marginals']
accs = snorkel_data['accs']
m = len(accs)

L_train.shape, L_dev.shape, L_test.shape, len(train_marginals), len(dev_marginals)

((22254, 10), (2811, 10), (2701, 10), 22254, 2811)

In [7]:
from metal.contrib.slicing.CDR.embeddings import EmbeddingLoader, load_embeddings
emb_path  = "../glove.6B/glove.6B.50d.txt"
embs  = EmbeddingLoader(emb_path, fmt='text')

The history saving thread hit an unexpected error (DatabaseError('database disk image is malformed',)).History will not be written to the database.


In [13]:
def init_model(use_end_model=False, r=None, rw=None):
    wembs = load_embeddings(train.word_dict, embs)
    lstm = LSTMModule(embed_size=50, 
                      hidden_size=50, 
                      embeddings=wembs,
                      lstm_reduction='attention', 
                      dropout=0.25, 
                      num_layers=1, 
                      freeze=False)
    if use_end_model:
        model = EndModel([100, 2], input_module=lstm, seed=123, use_cuda=use_cuda)
    else:
        model = SliceDPModel(lstm, accs, r, rw, seed=123, use_cuda=True)

    model.config['train_config']['optimizer_config']['optimizer_common']['lr'] = 0.01
    model.config['train_config']['validation_metric'] = 'f1'
    model.config['train_config']['batch_size'] = 32
    model.config['train_config']['n_epochs'] = 10
    return model

In [9]:
# multiclass
snorkel_marginals = np.vstack((train_marginals, 1-train_marginals)).T
snorkel_marginals

array([[0.19943235, 0.80056765],
       [0.19943235, 0.80056765],
       [0.19943235, 0.80056765],
       ...,
       [0.5       , 0.5       ],
       [0.5       , 0.5       ],
       [0.5       , 0.5       ]])

In [15]:
from metal.contrib.slicing.sqlite_wrapper \
    import SnorkelDataset as SnorkelSliceDataset

train_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    train_marginals=snorkel_marginals
)

train_slice = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train
)

train_slice_snorkel = SnorkelSliceDataset(
    db_conn_str,
    candidate_def,
    split=0,
    L_train=L_train,
    train_marginals=snorkel_marginals
)

Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db
Connected to sqlite:////dfs/scratch0/vschen/metal/metal/contrib/slicing/Spouses/spouses.db


## (a) `BaseWeak`: EndModel trained on weak labels

In [11]:
from metal.end_model import EndModel
from metal.modules import LSTMModule
use_cuda = torch.cuda.is_available()

base_weak = init_model(use_end_model=True)
%time base_weak.train_model(train_snorkel, dev_data=dev)
base_weak_scores = base_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 91.0% (29001/31870) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (31870, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'

Network architecture:
Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(31870, 50)
      (lstm): LSTM(50, 50, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
  (1): Linear(in_features=100, out_features=2, bias=True)
)

Using GPU...


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 0 with best score 0.489
[E:0]	Train Loss: 0.603	Dev f1: 0.489


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:1]	Train Loss: 0.598	Dev f1: 0.433


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:2]	Train Loss: 0.594	Dev f1: 0.440


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:3]	Train Loss: 0.592	Dev f1: 0.397


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:4]	Train Loss: 0.589	Dev f1: 0.416


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:5]	Train Loss: 0.587	Dev f1: 0.367


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:6]	Train Loss: 0.585	Dev f1: 0.401


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:7]	Train Loss: 0.584	Dev f1: 0.388


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:8]	Train Loss: 0.583	Dev f1: 0.361


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:9]	Train Loss: 0.581	Dev f1: 0.419
Restoring best model from iteration 0 with score 0.489
Finished Training
F1: 0.489
        y=1    y=2    y=3   
 l=1    85      7     67    
 l=2    104    109   2439   
 l=3     0      0      0    
CPU times: user 23min 2s, sys: 43.2 s, total: 23min 45s
Wall time: 23min 36s
Precision: 0.530
Recall: 0.564
F1: 0.547
        y=1    y=2    y=3   
 l=1    123     6     103   
 l=2    95     80    2294   
 l=3     0      0      0    


## (b) `SliceUW`: Unweighted SliceModel with `rw=False`

In [17]:
slice_uw = init_model(use_end_model=False, r=100, rw=False)
%time slice_uw.train_model(train_slice_snorkel, dev_data=dev)
slice_uw_scores = slice_uw.score(test, metric=['precision', 'recall', 'f1'])

Loaded 91.0% (29001/31870) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (31870, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(31870, 50)
      (lstm): LSTM(50, 50, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=100, out_features=10, bias=False)
Y_head: Linear(in_features=100, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

  self.criteria(F.softmax(self.forward_Y(X)), Y_tilde)



Saving model at iteration 0 with best score 0.394
[E:0]	Train Loss: 1.027	Dev f1: 0.394


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:1]	Train Loss: 1.016	Dev f1: 0.386


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:2]	Train Loss: 1.011	Dev f1: 0.385


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 3 with best score 0.428
[E:3]	Train Loss: 1.007	Dev f1: 0.428


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:4]	Train Loss: 1.004	Dev f1: 0.398


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:5]	Train Loss: 1.002	Dev f1: 0.395


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:6]	Train Loss: 1.001	Dev f1: 0.411


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 7 with best score 0.429
[E:7]	Train Loss: 0.999	Dev f1: 0.429


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:8]	Train Loss: 0.998	Dev f1: 0.409


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:9]	Train Loss: 0.998	Dev f1: 0.395
Restoring best model from iteration 7 with score 0.429
Finished Training
F1: 0.429
        y=1    y=2    y=3   
 l=1    107    19     184   
 l=2    82     97    2322   
 l=3     0      0      0    
CPU times: user 48min 50s, sys: 1min 21s, total: 50min 11s
Wall time: 49min 58s
Precision: 0.355
Recall: 0.651
F1: 0.460
        y=1    y=2    y=3   
 l=1    142    10     248   
 l=2    76     76    2149   
 l=3     0      0      0    


## (c) `SliceOurs`: Attention SliceModel with `rw=True`

In [18]:
slice_ours = init_model(use_end_model=False, r=100, rw=True)
%time slice_ours.train_model(train_slice, dev_data=dev)
slice_ours_scores = slice_ours.score(test, metric=['precision', 'recall', 'f1'])

Loaded 91.0% (29001/31870) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (31870, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(31870, 50)
      (lstm): LSTM(50, 50, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=100, out_features=10, bias=False)
Y_head: Linear(in_features=200, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

  A = F.softmax(self.forward_L(x)).unsqueeze(1)



Saving model at iteration 0 with best score 0.357
[E:0]	Train Loss: 1.029	Dev f1: 0.357


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 1 with best score 0.362
[E:1]	Train Loss: 1.016	Dev f1: 0.362


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 2 with best score 0.396
[E:2]	Train Loss: 1.011	Dev f1: 0.396


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 3 with best score 0.400
[E:3]	Train Loss: 1.008	Dev f1: 0.400


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:4]	Train Loss: 1.005	Dev f1: 0.384


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:5]	Train Loss: 1.003	Dev f1: 0.370


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:6]	Train Loss: 1.001	Dev f1: 0.360


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:7]	Train Loss: 1.000	Dev f1: 0.378


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


[E:8]	Train Loss: 0.998	Dev f1: 0.386


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))


Saving model at iteration 9 with best score 0.409
[E:9]	Train Loss: 0.998	Dev f1: 0.409
Restoring best model from iteration 9 with score 0.409
Finished Training
F1: 0.409
        y=1    y=2    y=3   
 l=1    104    15     201   
 l=2    85     101   2305   
 l=3     0      0      0    
CPU times: user 1h 7min 29s, sys: 2min 10s, total: 1h 9min 40s
Wall time: 1h 9min 27s
Precision: 0.359
Recall: 0.656
F1: 0.464
        y=1    y=2    y=3   
 l=1    143    13     242   
 l=2    75     73    2155   
 l=3     0      0      0    


## (d) `SliceOursWeak`: Slice Model with $\tilde{Y}$ priors

In [20]:
slice_ours_weak = init_model(use_end_model=False, r=100, rw=True)
%time slice_ours_weak.train_model(train_slice_snorkel, dev_data=dev)
slice_ours_weak_scores = slice_ours_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 91.0% (29001/31870) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (31870, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(31870, 50)
      (lstm): LSTM(50, 50, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=100, out_features=10, bias=False)
Y_head: Linear(in_features=200, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

  A = F.softmax(self.forward_L(x)).unsqueeze(1)
  self.criteria(F.softmax(self.forward_Y(X)), Y_tilde)


Saving model at iteration 0 with best score 0.369
[E:0]	Train Loss: 1.029	Dev f1: 0.369


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 1 with best score 0.374
[E:1]	Train Loss: 1.016	Dev f1: 0.374


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 2 with best score 0.376
[E:2]	Train Loss: 1.012	Dev f1: 0.376


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 3 with best score 0.419
[E:3]	Train Loss: 1.009	Dev f1: 0.419


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:4]	Train Loss: 1.007	Dev f1: 0.379


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:5]	Train Loss: 1.005	Dev f1: 0.373


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:6]	Train Loss: 1.003	Dev f1: 0.390


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:7]	Train Loss: 1.001	Dev f1: 0.366


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:8]	Train Loss: 1.000	Dev f1: 0.410


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:9]	Train Loss: 0.999	Dev f1: 0.355
Restoring best model from iteration 3 with score 0.419
Finished Training
F1: 0.419
        y=1    y=2    y=3   
 l=1    96     19     154   
 l=2    93     97    2352   
 l=3     0      0      0    
CPU times: user 1h 9min, sys: 2min 8s, total: 1h 11min 9s
Wall time: 1h 10min 55s
Precision: 0.391
Recall: 0.633
F1: 0.483
        y=1    y=2    y=3   
 l=1    138    10     205   
 l=2    80     76    2192   
 l=3     0      0      0    


## (e) `SliceUWWeak`: Unweighted Slice model with $\tilde{Y}$ priors

In [21]:
slice_uw_weak = init_model(use_end_model=False, r=100, rw=False)
%time slice_uw_weak.train_model(train_slice_snorkel, dev_data=dev)
slice_uw_weak_scores = slice_uw_weak.score(test, metric=['precision', 'recall', 'f1'])

Loaded 91.0% (29001/31870) pretrained embeddings
Using pretrained embeddings.
Embeddings shape = (31870, 50)
The embeddings are NOT FROZEN
Using lstm_reduction = 'attention'
Slice Heads:
Input Network: Sequential(
  (0): Sequential(
    (0): LSTMModule(
      (embeddings): Embedding(31870, 50)
      (lstm): LSTM(50, 50, batch_first=True, bidirectional=True)
    )
    (1): ReLU()
  )
)
L_head: Linear(in_features=100, out_features=10, bias=False)
Y_head: Linear(in_features=100, out_features=2, bias=False)
Using GPU...


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 0 with best score 0.368
[E:0]	Train Loss: 1.027	Dev f1: 0.368


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 1 with best score 0.390
[E:1]	Train Loss: 1.016	Dev f1: 0.390


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 2 with best score 0.391
[E:2]	Train Loss: 1.011	Dev f1: 0.391


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

Saving model at iteration 3 with best score 0.409
[E:3]	Train Loss: 1.006	Dev f1: 0.409


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:4]	Train Loss: 1.003	Dev f1: 0.354


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:5]	Train Loss: 1.001	Dev f1: 0.358


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:6]	Train Loss: 0.999	Dev f1: 0.362


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:7]	Train Loss: 0.998	Dev f1: 0.398


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:8]	Train Loss: 0.997	Dev f1: 0.370


HBox(children=(IntProgress(value=0, max=696), HTML(value='')))

[E:9]	Train Loss: 0.996	Dev f1: 0.377
Restoring best model from iteration 3 with score 0.409
Finished Training
F1: 0.409
        y=1    y=2    y=3   
 l=1    99     17     179   
 l=2    90     99    2327   
 l=3     0      0      0    
CPU times: user 46min 16s, sys: 1min 24s, total: 47min 40s
Wall time: 47min 28s
Precision: 0.374
Recall: 0.651
F1: 0.475
        y=1    y=2    y=3   
 l=1    142    10     228   
 l=2    76     76    2169   
 l=3     0      0      0    


## Slice-specific scores

In [29]:
from labeling_functions import LFs
print([lf.__name__ for lf in LFs])

['LF_distant_supervision', 'LF_distant_supervision_last_names', 'LF_husband_wife', 'LF_husband_wife_left_window', 'LF_same_last_name', 'LF_no_spouse_in_sentence', 'LF_and_married', 'LF_familial_relationship', 'LF_family_left_window', 'LF_other_relationship']


In [22]:
# TODO: don't call private fns
Yp_base_weak, Y = base_weak._get_predictions(test)
Yp_slice_uw, Y = slice_uw._get_predictions(test)
Yp_slice_ours, Y = slice_ours._get_predictions(test)
Yp_slice_ours_weak, Y = slice_ours_weak._get_predictions(test)
Yp_slice_uw_weak, Y = slice_uw_weak._get_predictions(test)

In [65]:
from metal.contrib.slicing.experiment_utils import compare_LF_slices
compare_LF_slices(Yp_slice_ours, Yp_base_weak, Y, L_test, LFs, metric='accuracy', delta_threshold=0.02)

[32m[LF_distant_supervision] delta: 0.1250, OURS: 0.5000, BASE: 0.3750[0m
[32m[LF_distant_supervision_last_names] delta: 0.0476, OURS: 0.2857, BASE: 0.2381[0m
[32m[LF_husband_wife_left_window] delta: 0.0217, OURS: 0.4384, BASE: 0.4167[0m
[32m[LF_same_last_name] delta: 0.0400, OURS: 0.1800, BASE: 0.1400[0m
improved 4/10


In [66]:
compare_LF_slices(Yp_slice_ours, Yp_slice_uw, Y, L_test, LFs, metric='accuracy', delta_threshold=0.02)

improved 0/10


### `slice_ours_weak` vs. `base_weak`

In [67]:
compare_LF_slices(Yp_slice_ours_weak, Yp_base_weak, Y, L_test, LFs, metric='accuracy', delta_threshold=0.02)

[32m[LF_distant_supervision] delta: 0.1250, OURS: 0.5000, BASE: 0.3750[0m
[32m[LF_distant_supervision_last_names] delta: 0.0476, OURS: 0.2857, BASE: 0.2381[0m
[32m[LF_same_last_name] delta: 0.0400, OURS: 0.1800, BASE: 0.1400[0m
improved 3/10


### `slice_ours_weak` vs. `slice_uw_weak`

In [71]:
compare_LF_slices(Yp_slice_ours_weak, Yp_slice_uw_weak, Y, L_test, LFs, metric='accuracy', delta_threshold=0.02)

improved 0/10
