In [None]:
# envionment setup
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
import site
site.addsitedir('../..')

## Experimental Area

## Developing Area

### example code

In [None]:
# data setting
relative_path = "data/EntityMentionRelation"
train_path = "conll04_train.corp"
valid_path = "conll04_test.corp"

# model setting
EMBEDDING_DIM = 16
HIDDEN_DIM = 8

# training setting
LR = 0.1
BATCH = 128
EPOCH = 10 # 1000
PATIENCE = 10

In [None]:
from regr import Graph
from regr.scaffold import Scaffold
from emr.models import datainput, word2vec, fc_sm
from emr.data import Data
from allennlp.models.model import Model


# develop by an ML programmer to wire nodes in the graph and ML Models
def make_model(graph: Graph,
               data: Data,
               scaffold: Scaffold
              ) -> Model:
    # get concepts from graph
    word = graph.word
    people = graph.people
    
    # binding
    graph.release() # release anything binded before new assignment
    
    # filling in data and label
    scaffold.assign(word, 'index', *datainput(data['sentence']))
    scaffold.assign(people, 'label', *datainput(data['label']))
    
    # building model
    scaffold.assign(word, 'w2v',
                    *word2vec(
                        word['index'],
                        data.vocab.get_vocab_size('tokens'),
                        EMBEDDING_DIM,
                        'tokens'
                    ))
    scaffold.assign(people, 'label',
                    *fc_sm(
                        word['w2v'],
                        EMBEDDING_DIM,
                        2
                    ))
    # now people['label'] has multiple assignment,
    # and the loss should come from the inconsistency here

    # get the model
    ModelCls = scaffold.build(graph) # or should it be model = graph.build()
    # NB: Link in the graph make be use to provide non parameterized
    #     transformation, what is a core feature of our graph.
    #     Is there a better semantic interface design?
    model = ModelCls(data.vocab)
    model.field_name['output'] = people.fullname + '[label]'
    model.field_name['label'] = 'label'
    
    return model

In [None]:
# envionment setup

#import logging
# logging.basicConfig(level=logging.INFO)

def seed1():
    import random
    import numpy as np
    import torch
    
    np.random.seed(1)
    random.seed(1)
    torch.manual_seed(1)
    
seed1()

In [None]:
import os
from emr.data import Data, NERPeopReader
from emr.graph import graph
from emr.models import get_trainer
from regr.scaffold import AllennlpScaffold
import torch

# data
reader = NERPeopReader()
train_dataset = reader.read(os.path.join(relative_path, train_path))
valid_dataset = reader.read(os.path.join(relative_path, valid_path))
data = Data(train_dataset, valid_dataset)

scaffold = AllennlpScaffold()

# model from graph
model = make_model(graph, data, scaffold)

# trainer for model
trainer = get_trainer(graph, model, data, scaffold)

# train the model
trainer.train()

# save the model
with open("/tmp/model.th", 'wb') as fout:
    torch.save(model.state_dict(), fout)
data.vocab.save_to_files("/tmp/vocab")