In this tutorial, we will walk through a multitask application on the OntoNotes dataset.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pickle
import sys,os
sys.path.append('../../metal')
os.environ['METALHOME'] = '/home/jdunnmon/repos/metal'

# Loading 5K Ontonotes dataset
with open("data/ontonotes_tutorial_data.pkl", 'rb') as f:
    data = pickle.load(f)

In [3]:
from metal.multitask import TaskHierarchy
task_graph = TaskHierarchy(cardinalities=data['cardinalities'], edges=data['edges'])

In [4]:
from metal.multitask import MTLabelModel
label_model = MTLabelModel(task_graph=task_graph)

In [5]:
import numpy as np
from scipy.sparse import csr_matrix
import torch

# Reformatting Ls
Ls = []
        
for L in data['Ls']:
    lst = []
    for a in range(len(data['task_to_lfs'])):
        L_tmp = np.zeros(L.shape)
        inds = np.array(data['task_to_lfs'][a])
        L_tmp[:,inds] = L.todense()[:,inds]
        lst.append(csr_matrix(L_tmp))
    Ls.append(lst)

# Reformatting Ys for MeTaL
Ys=[]
label_map = {0: [0,0,0],
             1: [1,1,0],
             2: [1,2,0],
             3: [2,0,1],
             4: [2,0,2]}
for Y in data['Ys']:
    Y_out = [[],[],[]]
    for y in Y:
        labels = label_map[int(y)]
        for ii,yo in enumerate(Y_out):
            yo.append(labels[ii])
    Ys.append(Y_out)
    
    
# Creating BOW data

Xs = data['Xs']
embeddings = data['embeddings']
Xs_BOWE = []
for X in Xs:
    X_BOWE=[]
    for x in X:
        embed = []
        bowe = torch.zeros((1,embeddings.shape[1]))
        count=0
        for ind in x:
            embed.append(embeddings[ind,:])
            if ind != 0:
                bowe+=embeddings[ind,:]
                count+=1
        bowe/=count
        X_BOWE.append(bowe)
    Xs_BOWE.append(torch.stack(X_BOWE).squeeze())

In [6]:
# SEPARATE OUT Ls BY TASK_TO_LF
label_model.train(Ls[0], n_epochs=500, print_every=20, seed=123)

Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.559
[E:20]	Train Loss: 0.142
[E:40]	Train Loss: 0.076
[E:60]	Train Loss: 0.072
[E:80]	Train Loss: 0.070
[E:100]	Train Loss: 0.070
[E:120]	Train Loss: 0.070
[E:140]	Train Loss: 0.070
[E:160]	Train Loss: 0.070
[E:180]	Train Loss: 0.070
[E:200]	Train Loss: 0.070
[E:220]	Train Loss: 0.070
[E:240]	Train Loss: 0.070
[E:260]	Train Loss: 0.070
[E:280]	Train Loss: 0.070
[E:300]	Train Loss: 0.070
[E:320]	Train Loss: 0.070
[E:340]	Train Loss: 0.070
[E:360]	Train Loss: 0.070
[E:380]	Train Loss: 0.070
[E:400]	Train Loss: 0.070
[E:420]	Train Loss: 0.070
[E:440]	Train Loss: 0.070
[E:460]	Train Loss: 0.070
[E:480]	Train Loss: 0.070
[E:499]	Train Loss: 0.070
Finished Training


In [7]:
label_model.score((Ls[1], Ys[1]))

Accuracy: 0.664


0.6642665735779122

In [8]:
# Y_train_ps stands for "Y[labels]_train[split]_p[redicted]s[oft]"
Y_train_ps = label_model.predict_proba(Ls[0])

In [37]:
from metal.multitask import MTEndModel
import torch

use_cuda = torch.cuda.is_available()
end_model = MTEndModel([300,10000,100], task_graph=task_graph, seed=123, use_cuda=use_cuda, disable_prog_bar= True)

Could not find kwarg "input_modules" in default config.
Could not find kwarg "middle_modules" in default config.
Could not find kwarg "head_modules" in default config.

Network architecture:

--Input Layer--
IdentityModule()

--Middle Layers--
(layer1):
Sequential(
  (0): Linear(in_features=300, out_features=10000, bias=True)
  (1): ReLU()
)

(layer2):
Sequential(
  (0): Linear(in_features=10000, out_features=100, bias=True)
  (1): ReLU()
)
(head0)
Linear(in_features=100, out_features=2, bias=True)
(head1)
Linear(in_features=100, out_features=2, bias=True)
(head2)
Linear(in_features=100, out_features=2, bias=True)




In [38]:
os.environ['CUDA_VISIBLE_DEVICES']='0'
end_model.train((Xs_BOWE[0], Y_train_ps), dev_data=(Xs_BOWE[1], torch.Tensor(Ys[1])), n_epochs=500, seed=123)

Using GPU...
Saving model at iteration 0 with best score 0.487
[E:0]	Train Loss: 2.004	Dev score: 0.487
Saving model at iteration 1 with best score 0.517
[E:1]	Train Loss: 1.825	Dev score: 0.517
Saving model at iteration 2 with best score 0.625
[E:2]	Train Loss: 1.785	Dev score: 0.625


Process Process-16:
Traceback (most recent call last):
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 96, in _worker_loop
    r = index_queue.get(timeout=MANAGER_STATUS_CHECK_INTERVAL)
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/multiprocessing/queues.py", line 104, in get
    if not self._poll(timeout):
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/multiprocessing/connection.py", line 257, in poll
    return self._poll(timeout)
  File "/home/jdunnmon/repos/anaconda3/envs/metal/lib/python3.6/multiprocessing/connection.py", line 414, in _poll
    r = wait([self], timeout)
  File "/home/jdunnmon/repos

KeyboardInterrupt: 

In [22]:
print("Label Model:")
score = label_model.score((Ls[2], Ys[2]))

print()

print("End Model:")
score = end_model.score((Xs_BOWE[2], Ys[2]))

Label Model:
Accuracy: 0.682

End Model:
Accuracy: 0.678


In [9]:
from metal.multitask import MTEndModel
from metal.end_model import EndModel
from metal.modules import LSTMModule
import torch


# LSTM parameters
hidden_size = 100
embed_size = embeddings.shape[1]
vocab_size = embeddings.shape[0] # Update Metal to handle this more gracefully!
max_seq_length = Xs[0]
input_module = LSTMModule(embed_size, hidden_size, vocab_size = vocab_size)

use_cuda = torch.cuda.is_available()
end_model = MTEndModel([hidden_size,300,100], task_graph=task_graph, \
                       seed=123, use_cuda=use_cuda, disable_prog_bar= False, \
                       input_modules=[input_module], embeddings=embeddings, freeze=True)

Using randomly initialized embeddings.
Embeddings shape = (15139, 300)
The embeddings are NOT FROZEN
Using lstm_reduction = 'max'
Could not find kwarg "embeddings" in default config.
Could not find kwarg "freeze" in default config.
Could not find kwarg "input_modules" in default config.
Could not find kwarg "middle_modules" in default config.
Could not find kwarg "head_modules" in default config.

Network architecture:

--Input Layer--
Sequential(
  (0): LSTMModule(
    (embeddings): Embedding(15139, 300)
    (lstm): LSTM(300, 100, batch_first=True, bidirectional=True)
  )
  (1): ReLU()
)

--Middle Layers--
(layer1):
Sequential(
  (0): Linear(in_features=100, out_features=300, bias=True)
  (1): ReLU()
)

(layer2):
Sequential(
  (0): Linear(in_features=300, out_features=100, bias=True)
  (1): ReLU()
)
(head0)
Linear(in_features=100, out_features=2, bias=True)
(head1)
Linear(in_features=100, out_features=2, bias=True)
(head2)
Linear(in_features=100, out_features=2, bias=True)




In [None]:
import torch
from torch.utils.data import DataLoader
from metal.multitask.utils import MultiYDataset
import os
os.environ['CUDA_VISIBLE_DEVICES']='2'

#train_data = ([x for x in Xs[0]],Y_train_ps)
#dev_data = ([x for x in Xs[1]], torch.Tensor(Ys[1]))

train_data = MultiYDataset(Xs[0],Y_train_ps)
dev_data = MultiYDataset(Xs[1], torch.Tensor(Ys[1]))

#train_data = (Xs[0],Y_train_ps)
#dev_data = (Xs[1], torch.Tensor(Ys[1]))

end_model.train(train_data, dev_data=dev_data, \
                n_epochs=50, batch_size=256, seed=123, l2=0.001, lr=0.001, num_workers=4,\
                print_every=1)

# Training end model
#train_data = (Xs[0].long(), torch.Tensor(Y_train_ps))
#dev_data = (Xs[1].long(), torch.Tensor(Ys[1]))
#batch_size = 256

#train_data = DataLoader(MetalDataset(*train_data), shuffle=True, batch_size=batch_size)
#dev_data = DataLoader(MetalDataset(*dev_data), shuffle=True, batch_size = batch_size)

#end_model.train(train_data, dev_data=dev_data, l2=0.001, lr=0.001, batch_size=256, 
#                num_workers=8, n_epochs=2, print_every=1, validation_metric='accuracy')

# Emptying cuda cache (add this to metal?)
torch.cuda.empty_cache()

Using GPU...






  0%|          | 0/20 [00:00<?, ?it/s][A[A[A[A

> /home/jdunnmon/repos/metal/metal/modules/lstm_module.py(138)forward()
-> batch_size, max_seq = X.shape
(Pdb) X
tensor([  144,     1,   156,  5506,   232,    23,    11,    97,     8,   291,
          114,  4242,    89,   234,   538,     2,  1220,     6,  2600,    14,
           34,   183,   963, 14475,     9,  3056,     8, 13147,     4, 14620,
            3,    10,  1118,    73,     5,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0, 

In [30]:
from torch.utils.data import DataLoader
dl = DataLoader(train_data, **end_model.config["train_config"]["data_loader_config"])

In [38]:
for a,b in dl:
    print(a[0].shape)

torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])
torch.Size([207])


In [41]:
a[0].shape

torch.Size([207])

## SANDBOX

In [33]:
len(Y_train_ps[2])

5000

In [13]:
import pickle
import sys,os
#sys.path.append('../../metal')
sys.path.append('../../metal_nips')

with open("data/ontonotes_tutorial_data.pkl", 'rb') as f:
    data = pickle.load(f)
    
a = data['task_tree']

In [11]:
Xs[0].shape

torch.Size([5000, 207])

In [14]:
a.edges

[(0, 1), (0, 2)]

In [15]:
data['task_tree'] = None

In [21]:
data['edges'] = a.edges
data['cardinalities'] = a.K

In [20]:
a.K

[2, 2, 2]

In [23]:
with open("data/ontonotes_tutorial_data.pkl", 'wb') as f:
    pickle.dump(data,f)

In [7]:
data

{'Ls': [<5000x33 sparse matrix of type '<class 'numpy.int8'>'
  	with 20231 stored elements in Compressed Sparse Column format>,
  <347x33 sparse matrix of type '<class 'numpy.int8'>'
  	with 1603 stored elements in Compressed Sparse Column format>,
  <345x33 sparse matrix of type '<class 'numpy.int8'>'
  	with 1611 stored elements in Compressed Sparse Column format>],
 'Xs': [tensor([[    4, 11183,     3,  ...,     0,     0,     0],
          [    2,     4,  2285,  ...,     0,     0,     0],
          [ 1584,     1,    10,  ...,     0,     0,     0],
          ...,
          [  751,    71,  3248,  ...,     0,     0,     0],
          [    2, 12398,  6691,  ...,     0,     0,     0],
          [   70,    73,   293,  ...,     0,     0,     0]]),
  tensor([[  618,   621,    43,  ...,     0,     0,     0],
          [    2,  6661,   178,  ...,     0,     0,     0],
          [  414,     1,     4,  ...,     0,     0,     0],
          ...,
          [  120,   110,   346,  ...,     0,     0

In [17]:
import GPUtil
a = GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% |  9% |
|  1 |  0% |  0% |
|  2 |  0% |  0% |
|  3 |  0% |  3% |


In [19]:
GPUs = GPUtil.getGPUs()

In [22]:
GPU0=GPUs[0]

In [25]:
GPU0.load

0.0

In [1]:
### INITIAL CODE FOR GPU LOAD TEST

#import torch.multiprocessing as multiprocessing
#multiprocessing.set_start_method('forkserver')
import sys
sys.path.append('../../metal')
import os
os.environ['CUDA_VISIBLE_DEVICES']='0'
import metal
import numpy as np
import pickle
import torch
import time

import GPUtil

from metal.end_model import EndModel
from metal.label_model import LabelModel
from metal.utils import split_data


with open("data/basics_tutorial.pkl", 'rb') as f:
    X, Y, L, D = pickle.load(f)

Xs, Ys, Ls, Ds = split_data(X, Y, L, D, splits=[0.8, 0.1, 0.1], stratify_by=Y, seed=123)


label_model = LabelModel(k=2, seed=123)
label_model.train(Ls[0], Y_dev=Ys[1], n_epochs=500, print_every=25)
Y_train_ps = label_model.predict_proba(Ls[0])

use_cuda = torch.cuda.is_available()
#if not use_cuda:
    # Aborting test if no GPU!
#    return

end_model = EndModel([1000,100000,2], seed=123, use_cuda=use_cuda)
def train_end_model():
    end_model.train((Xs[0], Y_train_ps), dev_data=(Xs[1], Ys[1]), l2=0.1, batch_size=256, 
                n_epochs=3, print_every=1, validation_metric='f1')

def check_gpu_use(ind=0):
    # For now, this test assumes we use GPU0 and that it is otherwise empty!
    GPU_use = []
    samples = 2
    i = 0
    while i < samples:
        print(f'GPU sample {i}')
        GPU_use.append(GPUtil.getGPUs()[ind].load)
        time.sleep(3)
        i+=1
    return np.array(GPU_use)
    
def gpu_use_queue(queue):
    GPU_use = check_gpu_use(ind)
    queue.put(GPU_use)
    
# Getting initial GPU use
initial_gpu_use = GPUtil.getGPUs()[0].memoryUsed
train_end_model()
final_gpu_use = GPUtil.getGPUs()[0].memoryUsed
print(final_gpu_use-initial_gpu_use)

                           
# Checking use while training
#queue = multiprocessing.Queue()
#p1 = multiprocessing.Process(name='p1', target=check_gpu_use, args=(queue,))
#p = multiprocessing.Process(name='p', target=train_end_model)
#p1.start()
#p.start()

#active_gpu_use = queue.get() 

#print(active_gpu_use-initial_gpu_use)

Computing O...
Estimating \mu...
[E:0]	Train Loss: 6.028
[E:25]	Train Loss: 0.438
[E:50]	Train Loss: 0.029
[E:75]	Train Loss: 0.004
[E:100]	Train Loss: 0.003
[E:125]	Train Loss: 0.003
[E:150]	Train Loss: 0.002
[E:175]	Train Loss: 0.002
[E:200]	Train Loss: 0.002
[E:225]	Train Loss: 0.002
[E:250]	Train Loss: 0.002
[E:275]	Train Loss: 0.002
[E:300]	Train Loss: 0.002
[E:325]	Train Loss: 0.002
[E:350]	Train Loss: 0.002
[E:375]	Train Loss: 0.002
[E:400]	Train Loss: 0.002
[E:425]	Train Loss: 0.002
[E:450]	Train Loss: 0.002
[E:475]	Train Loss: 0.002
[E:499]	Train Loss: 0.002
Finished Training

Network architecture:
Sequential(
  (0): IdentityModule()
  (1): Sequential(
    (0): Linear(in_features=1000, out_features=100000, bias=True)
    (1): ReLU()
  )
  (2): Linear(in_features=100000, out_features=2, bias=True)
)

Using GPU...


100%|██████████| 32/32 [00:01<00:00, 26.29it/s]


Saving model at iteration 0 with best score 0.773
[E:0]	Train Loss: 2.722	Dev score: 0.773


100%|██████████| 32/32 [00:01<00:00, 26.47it/s]


[E:1]	Train Loss: 0.620	Dev score: 0.308


100%|██████████| 32/32 [00:01<00:00, 26.53it/s]


[E:2]	Train Loss: 0.742	Dev score: 0.071
Restoring best model from iteration 0 with score 0.773
Finished Training
Accuracy: 0.857
        y=1    y=2   
 l=1    244    141   
 l=2     2     613   
3497.0


In [3]:
initial_gpu_use


12.0