### Import Libraries

In [1]:
from collections import OrderedDict
import torch
import tensorboardX
import dill as pickle

In [2]:
import sys
sys.path.append("sopa_master/")

In [6]:
from data import read_embeddings, read_docs, read_labels
from soft_patterns import ProbSemiring, MaxPlusSemiring, LogSpaceMaxTimesSemiring, SoftPatternClassifier, train, Batch, evaluate_model
from util import to_cuda
from interpret_classification_results import interpret_documents
from visualize import visualize_patterns
from baseline.lstm import AveragingRnnClassifier

### Files

In [4]:
train_data_file = "data/time_data_clean/train.data"
train_label_file ="data/time_data_clean/train.labels"
dev_data_file = "data/time_data_clean/dev.data"
dev_label_file = "data/time_data_clean/dev.labels"
test_file = "data/time_data_clean/test.data"
test_label="data/time_data_clean/test.labels"

### Import Embeddings

In [8]:
vocab = pickle.load(open("vocab.p","rb"))
embeddings = pickle.load(open("embeddings.p","rb"))
word_dim = pickle.load(open("word_dim.p","rb"))

# Bilstm

In [10]:
model=AveragingRnnClassifier(
    hidden_dim=100,
    mlp_hidden_dim=10,
    num_mlp_layers=2,
    num_classes=2,
    embeddings=embeddings
)

# params: 324032


### Training

In [11]:
train_input, train_text = read_docs(train_data_file, vocab, num_padding_tokens=1)
train_labels = read_labels(train_label_file)
dev_input, dev_text = read_docs(dev_data_file, vocab, num_padding_tokens=1)
dev_labels = read_labels(dev_label_file)

In [12]:
train_data = list(zip(train_input, train_labels))
dev_data = list(zip(dev_input, dev_labels))

In [14]:
train(
    train_data=train_data,
    dev_data=dev_data,
    model=model,
    model_save_dir="data/models/modeltimebilstm/",
    num_iterations=250,
    model_file_prefix="traintimebilstm",
    learning_rate=0.001,
    batch_size=150,
    num_classes=2,
    patience=30,
    gpu=False,
    dropout=0.1,
    word_dropout=0,
    clip=None
)

  log_softmax(output).view(batch.size(), num_classes),


....................

num predicted 1s: 0
num gold 1s:      343
num predicted 1s: 0
num gold 1s:      207
iteration:       0 train time:     0.080m, eval time:     0.025m train loss:        0.648 train_acc:   65.700% dev loss:        0.641 dev_acc:   64.433%
New best acc!
New best dev!
saving model to modeltimebilstm/traintimebilstm_0.pth
....................

num predicted 1s: 0
num gold 1s:      320
num predicted 1s: 0
num gold 1s:      207
iteration:       1 train time:     0.172m, eval time:     0.024m train loss:        0.615 train_acc:   68.000% dev loss:        0.633 dev_acc:   64.433%
New best dev!
saving model to modeltimebilstm/traintimebilstm_1.pth
....................

num predicted 1s: 0
num gold 1s:      314
num predicted 1s: 0
num gold 1s:      207
iteration:       2 train time:     0.262m, eval time:     0.026m train loss:        0.603 train_acc:   68.600% dev loss:        0.626 dev_acc:   64.433%
New best dev!
saving model to modeltimebilstm/traintimebilstm_2.pth
.....

AveragingRnnClassifier(
  (rnn): Rnn(
    (rnn): LSTM(300, 100, bidirectional=True)
  )
  (mlp): MLP(
    (layers): ModuleList(
      (0): Linear(in_features=200, out_features=10, bias=True)
      (1): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [15]:
torch.save(model.state_dict(), "data/models/modeltimebilstm.pth")