In [4]:
!git clone https://github.com/AlexWan0/DomiKnowS.git
!cd DomiKnowS && git checkout notebook-program-name
!mv DomiKnowS/* .
!pip install -r requirements.txt

Cloning into 'DomiKnowS'...
remote: Enumerating objects: 15454, done.[K
remote: Counting objects: 100% (171/171), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 15454 (delta 104), reused 167 (delta 104), pack-reused 15283[K
Receiving objects: 100% (15454/15454), 706.57 MiB | 32.98 MiB/s, done.
Resolving deltas: 100% (11133/11133), done.
Checking out files: 100% (314/314), done.
Branch 'notebook-program-name' set up to track remote branch 'notebook-program-name' from 'origin'.
Switched to a new branch 'notebook-program-name'
Collecting acls==1.0.2
  Downloading https://files.pythonhosted.org/packages/0b/03/1ab1f5cdffac551b7351444cccdac01d0b0f3ddf85b84a6a7d4aa858aa65/acls-1.0.2.tar.gz
Collecting Owlready2==0.30
[?25l  Downloading https://files.pythonhosted.org/packages/b5/b0/9ebdf9f3c104adac9ff529f7ffd11e70e0d056cebd4446ad8312ab449516/Owlready2-0.30.tar.gz (23.7MB)
[K     |████████████████████████████████| 23.7MB 130kB/s 
[?25hCollecting gurobipy
[?25l  

In [5]:
!pip install torchtext==0.9 --no-dependencies



In [47]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
from regr.graph import Graph, Concept, Relation
from regr.graph.relation import disjoint

Graph.clear()
Concept.clear()
Relation.clear()

with Graph('global') as graph:
  review = Concept(name='review')

  positive = review(name='positive')
  negative = review(name='negative')

  disjoint(positive, negative)

In [28]:
from regr.sensor.pytorch.sensors import ReaderSensor

review['text'] = ReaderSensor(keyword='text')

review[positive] = ReaderSensor(keyword='positive', label=True)
review[negative] = ReaderSensor(keyword='negative', label=True)

In [29]:
embed_size = 300
hidden_size = 100
num_classes = 2
drop_rate = 0.5

In [30]:
from regr.sensor.pytorch.sensors import FunctionalSensor
from torchtext.vocab import GloVe
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
import torch

class EmbeddingSensor(FunctionalSensor):
  def __init__(self, *pres, **kwarg):
    super().__init__(*pres, **kwarg)

    self.vocab = GloVe(name='840B', dim=embed_size)
    self.tokenizer = get_tokenizer('spacy', language='en')
  
  def forward(self, *inputs):
    text = inputs[0]

    tokens_batch = [self.tokenizer(text)]

    emb_batch = []
    for tokens in tokens_batch:
      rev_emb = torch.empty((len(tokens), embed_size))
      for i, tok in enumerate(tokens):
        rev_emb[i] = self.vocab[tok]
      
      emb_batch.append(rev_emb)

    padded = pad_sequence(emb_batch)

    out = padded.to(device=self.device)

    return out

In [31]:
review['text_embed'] = EmbeddingSensor('text')

INFO:torchtext.vocab:Loading vectors from .vector_cache/glove.840B.300d.txt.pt






In [32]:
from torch import nn

class LSTMModule(nn.Module):
  def __init__(self):
    super(LSTMModule, self).__init__()

    self.rnn = nn.LSTM(embed_size, hidden_size, bidirectional=True)
    self.dropout = nn.Dropout(p=drop_rate)

  def forward(self, input):
    output, (h, c) = self.rnn(input)
    forward, backward = torch.chunk(output, 2, dim=2)
    comb = torch.cat((forward[-1,:,:], backward[0,:,:]), dim=1)

    return self.dropout(comb)

In [33]:
from regr.sensor.pytorch.learners import ModuleLearner
from torch import nn

review['rnn_embed'] = ModuleLearner('text_embed', module=LSTMModule())

review[positive] = ModuleLearner('rnn_embed', module=nn.Linear(hidden_size * 2, num_classes))
review[negative] = ModuleLearner('rnn_embed', module=nn.Linear(hidden_size * 2, num_classes))

In [34]:
from regr.program import SolverPOIProgram
from regr.program.metric import MacroAverageTracker, PRF1Tracker, DatanodeCMMetric
from regr.program.loss import NBCrossEntropyLoss

program = SolverPOIProgram(graph, inferTypes=['ILP', 'local/argmax'], loss=MacroAverageTracker(NBCrossEntropyLoss()), metric={'ILP':PRF1Tracker(DatanodeCMMetric()),'argmax':PRF1Tracker(DatanodeCMMetric('local/argmax'))})

In [35]:
from torchtext.datasets import IMDB
import os

if not os.path.exists('/content/.data/aclImdb/test/neg/127_3.txt'):
  IMDB()

In [36]:
import logging

logging.basicConfig(level=logging.INFO)

In [37]:
import glob
import random

def get_data(directory, label):
  data_all = []
  for path in glob.glob(os.path.join(directory, label + '/*.txt')):
    data_dict = {}
    with open(path, 'r') as f:
      data_dict['text'] = f.read()
      data_dict['positive'] = [1 if label == 'pos' else 0]
      data_dict['negative'] = [1 if label == 'neg' else 0]
    data_all.append(data_dict)
  return data_all

train_data = get_data('.data/aclImdb/train', 'pos')
train_data.extend(get_data('.data/aclImdb/train', 'neg'))
random.shuffle(train_data)

test_data = get_data('.data/aclImdb/test', 'pos')
test_data.extend(get_data('.data/aclImdb/test', 'neg'))
random.shuffle(test_data)

In [38]:
print(len(train_data))
print(train_data[0])

25000
{'text': "Lame, cliched superhero action movie drivel. I had high hopes for this movie, and the genre of HK buddy cop actioneers is one that i don't despise, but very rarely do i see a storyline as trite and ludicrous as this one was. This would have been forgivable, as it always is in these kinds of movies, when the action compensates, unfortunately, it did not. The action does carry the trademark surreality and over the top nature of HK action, but it's not very involving, obscenely gory, and in fact often completely incoherent (perhaps this is due to re-editing for american release, it does show signs in many places of patchwork). I was very disappointed.", 'positive': [0], 'negative': [1]}


In [39]:
import time

def create_batches(data, batch_size=128):
  batched = []
  for i in range(0, len(data), batch_size):
    b_item = {'text':[], 'positive':[], 'negative':[]}
    for j in range(i, min(i + batch_size, len(data))):
      for key, value in data[j].items():
        b_item[key].append(value)
    batched.append(b_item)
  return batched

In [40]:
split_idx = int(len(train_data)*0.8)

In [41]:
program.train(train_data[:5000],
              valid_set=train_data[split_idx:split_idx+2000],
              test_set=test_data[:2000], train_epoch_num=10, Optim=torch.optim.Adam, device='cuda')

INFO:regr.program.program:Epoch: 1
INFO:regr.program.program:Training:


HBox(children=(FloatProgress(value=0.0, description='Epoch 1 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.5722), 'negative': tensor(0.5714)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.6830), 'R': tensor(0.6556), 'F1': tensor(0.6690)}, 'negative': {'P': tensor(0.6929), 'R': tensor(0.6809), 'F1': tensor(0.6868)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.6811), 'R': tensor(0.6751), 'F1': tensor(0.6781)}, 'negative': {'P': tensor(0.6883), 'R': tensor(0.6907), 'F1': tensor(0.6895)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 1 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.3675), 'negative': tensor(0.3682)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8271), 'R': tensor(0.8673), 'F1': tensor(0.8467)}, 'negative': {'P': tensor(0.8646), 'R': tensor(0.8193), 'F1': tensor(0.8414)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8272), 'R': tensor(0.8683), 'F1': tensor(0.8473)}, 'negative': {'P': tensor(0.8649), 'R': tensor(0.8213), 'F1': tensor(0.8425)}}
INFO:regr.program.program:Epoch: 2
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 2 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.3420), 'negative': tensor(0.3417)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8525), 'R': tensor(0.8535), 'F1': tensor(0.8530)}, 'negative': {'P': tensor(0.8590), 'R': tensor(0.8533), 'F1': tensor(0.8561)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.8507), 'R': tensor(0.8552), 'F1': tensor(0.8529)}, 'negative': {'P': tensor(0.8590), 'R': tensor(0.8556), 'F1': tensor(0.8573)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 2 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.3101), 'negative': tensor(0.3101)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8482), 'R': tensor(0.8835), 'F1': tensor(0.8655)}, 'negative': {'P': tensor(0.8813), 'R': tensor(0.8430), 'F1': tensor(0.8618)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8484), 'R': tensor(0.8845), 'F1': tensor(0.8661)}, 'negative': {'P': tensor(0.8816), 'R': tensor(0.8450), 'F1': tensor(0.8629)}}
INFO:regr.program.program:Epoch: 3
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 3 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.2571), 'negative': tensor(0.2571)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8954), 'R': tensor(0.8925), 'F1': tensor(0.8939)}, 'negative': {'P': tensor(0.8975), 'R': tensor(0.8978), 'F1': tensor(0.8977)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.8944), 'R': tensor(0.8937), 'F1': tensor(0.8941)}, 'negative': {'P': tensor(0.8964), 'R': tensor(0.8978), 'F1': tensor(0.8971)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 3 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.3316), 'negative': tensor(0.3310)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8528), 'R': tensor(0.8804), 'F1': tensor(0.8664)}, 'negative': {'P': tensor(0.8824), 'R': tensor(0.8519), 'F1': tensor(0.8669)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8503), 'R': tensor(0.8804), 'F1': tensor(0.8651)}, 'negative': {'P': tensor(0.8827), 'R': tensor(0.8539), 'F1': tensor(0.8680)}}
INFO:regr.program.program:Epoch: 4
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 4 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.1817), 'negative': tensor(0.1816)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.9290), 'R': tensor(0.9339), 'F1': tensor(0.9314)}, 'negative': {'P': tensor(0.9361), 'R': tensor(0.9298), 'F1': tensor(0.9329)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.9290), 'R': tensor(0.9343), 'F1': tensor(0.9316)}, 'negative': {'P': tensor(0.9358), 'R': tensor(0.9314), 'F1': tensor(0.9336)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 4 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.3577), 'negative': tensor(0.3568)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8462), 'R': tensor(0.9027), 'F1': tensor(0.8735)}, 'negative': {'P': tensor(0.8986), 'R': tensor(0.8401), 'F1': tensor(0.8684)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8454), 'R': tensor(0.9027), 'F1': tensor(0.8731)}, 'negative': {'P': tensor(0.8986), 'R': tensor(0.8401), 'F1': tensor(0.8684)}}
INFO:regr.program.program:Epoch: 5
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 5 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.1146), 'negative': tensor(0.1147)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.9565), 'R': tensor(0.9550), 'F1': tensor(0.9557)}, 'negative': {'P': tensor(0.9563), 'R': tensor(0.9578), 'F1': tensor(0.9570)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.9562), 'R': tensor(0.9554), 'F1': tensor(0.9558)}, 'negative': {'P': tensor(0.9563), 'R': tensor(0.9578), 'F1': tensor(0.9570)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 5 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.4397), 'negative': tensor(0.4396)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8594), 'R': tensor(0.8734), 'F1': tensor(0.8663)}, 'negative': {'P': tensor(0.8755), 'R': tensor(0.8608), 'F1': tensor(0.8681)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8594), 'R': tensor(0.8734), 'F1': tensor(0.8663)}, 'negative': {'P': tensor(0.8755), 'R': tensor(0.8608), 'F1': tensor(0.8681)}}
INFO:regr.program.program:Epoch: 6
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 6 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.0770), 'negative': tensor(0.0770)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.9721), 'R': tensor(0.9736), 'F1': tensor(0.9728)}, 'negative': {'P': tensor(0.9743), 'R': tensor(0.9724), 'F1': tensor(0.9733)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.9721), 'R': tensor(0.9736), 'F1': tensor(0.9728)}, 'negative': {'P': tensor(0.9743), 'R': tensor(0.9724), 'F1': tensor(0.9733)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 6 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.5465), 'negative': tensor(0.5456)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8587), 'R': tensor(0.8744), 'F1': tensor(0.8665)}, 'negative': {'P': tensor(0.8754), 'R': tensor(0.8598), 'F1': tensor(0.8675)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8587), 'R': tensor(0.8744), 'F1': tensor(0.8665)}, 'negative': {'P': tensor(0.8754), 'R': tensor(0.8598), 'F1': tensor(0.8675)}}
INFO:regr.program.program:Epoch: 7
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 7 Training', max=5000.0, style=ProgressStyle(descri…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.0654), 'negative': tensor(0.0652)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.9781), 'R': tensor(0.9769), 'F1': tensor(0.9775)}, 'negative': {'P': tensor(0.9775), 'R': tensor(0.9787), 'F1': tensor(0.9781)}}
INFO:regr.program.program: - - argmax





INFO:regr.program.program:{'positive': {'P': tensor(0.9781), 'R': tensor(0.9769), 'F1': tensor(0.9775)}, 'negative': {'P': tensor(0.9775), 'R': tensor(0.9787), 'F1': tensor(0.9781)}}
INFO:regr.program.program:Validation:


HBox(children=(FloatProgress(value=0.0, description='Epoch 7 Validation', max=2000.0, style=ProgressStyle(desc…

INFO:regr.program.program: - loss:
INFO:regr.program.program:{'positive': tensor(0.6043), 'negative': tensor(0.6047)}
INFO:regr.program.program: - metric:
INFO:regr.program.program: - - ILP
INFO:regr.program.program:{'positive': {'P': tensor(0.8559), 'R': tensor(0.8845), 'F1': tensor(0.8700)}, 'negative': {'P': tensor(0.8837), 'R': tensor(0.8549), 'F1': tensor(0.8690)}}
INFO:regr.program.program: - - argmax
INFO:regr.program.program:{'positive': {'P': tensor(0.8559), 'R': tensor(0.8845), 'F1': tensor(0.8700)}, 'negative': {'P': tensor(0.8828), 'R': tensor(0.8549), 'F1': tensor(0.8686)}}
INFO:regr.program.program:Epoch: 8
INFO:regr.program.program:Training:





HBox(children=(FloatProgress(value=0.0, description='Epoch 8 Training', max=5000.0, style=ProgressStyle(descri…

KeyboardInterrupt: ignored