### Import Libraries

In [1]:
from collections import OrderedDict
import torch
import tensorboardX
import dill as pickle

In [2]:
import sys
sys.path.append("sopa_master/")

In [6]:
from data import read_embeddings, read_docs, read_labels
from soft_patterns import ProbSemiring, MaxPlusSemiring, LogSpaceMaxTimesSemiring, SoftPatternClassifier, train, Batch, evaluate_accuracy
from util import to_cuda
from interpret_classification_results import interpret_documents
from visualize import visualize_patterns
from baseline.dan import DanClassifier

### Files

In [7]:
train_data_file = "data/time_data_clean/train.data"
train_label_file ="data/time_data_clean/train.labels"
dev_data_file = "data/time_data_clean/dev.data"
dev_label_file = "data/time_data_clean/dev.labels"
test_file = "data/time_data_clean/test.data"
test_label="data/time_data_clean/test.labels"

### Model's parameters

In [8]:
vocab = pickle.load(open("vocab.p","rb"))
embeddings = pickle.load(open("embeddings.p","rb"))
word_dim = pickle.load(open("word_dim.p","rb"))

# DAN

In [9]:
model=DanClassifier(
    mlp_hidden_dim=10,
    num_mlp_layers=2,
    num_classes=2,
    embeddings=embeddings
)

# params: 3032


### Training

In [10]:
train_input, train_text = read_docs(train_data_file, vocab, num_padding_tokens=1)
train_labels = read_labels(train_label_file)
dev_input, dev_text = read_docs(dev_data_file, vocab, num_padding_tokens=1)
dev_labels = read_labels(dev_label_file)

In [11]:
train_data = list(zip(train_input, train_labels))
dev_data = list(zip(dev_input, dev_labels))

In [12]:
train(
    train_data=train_data,
    dev_data=dev_data,
    model=model,
    model_save_dir="data/models/modeltimedan/",
    num_iterations=250,
    model_file_prefix="traintimedan",
    learning_rate=0.05,
    batch_size=150,
    num_classes=2,
    patience=30,
    gpu=False,
    word_dropout=0.1,
)

.....

  log_softmax(output).view(batch.size(), num_classes),


...............

num predicted 1s: 0
num gold 1s:      317
num predicted 1s: 0
num gold 1s:      207
iteration:       0 train time:     0.017m, eval time:     0.008m train loss:        0.687 train_acc:   68.300% dev loss:        0.669 dev_acc:   64.433%
New best acc!
New best dev!
saving model to modeltimedan/traintimedan_0.pth
....................

num predicted 1s: 0
num gold 1s:      330
num predicted 1s: 0
num gold 1s:      207
iteration:       1 train time:     0.042m, eval time:     0.008m train loss:        0.622 train_acc:   67.000% dev loss:        0.629 dev_acc:   64.433%
New best dev!
saving model to modeltimedan/traintimedan_1.pth
....................

num predicted 1s: 0
num gold 1s:      311
num predicted 1s: 0
num gold 1s:      207
iteration:       2 train time:     0.067m, eval time:     0.009m train loss:        0.592 train_acc:   68.900% dev loss:        0.582 dev_acc:   64.433%
New best dev!
saving model to modeltimedan/traintimedan_2.pth
....................

num pr

....................

num predicted 1s: 379
num gold 1s:      327
num predicted 1s: 246
num gold 1s:      207
iteration:      27 train time:     0.739m, eval time:     0.008m train loss:        0.368 train_acc:   85.600% dev loss:        0.460 dev_acc:   75.086%
....................

num predicted 1s: 309
num gold 1s:      322
num predicted 1s: 215
num gold 1s:      207
iteration:      28 train time:     0.764m, eval time:     0.008m train loss:        0.363 train_acc:   86.900% dev loss:        0.458 dev_acc:   77.320%
....................

num predicted 1s: 249
num gold 1s:      333
num predicted 1s: 167
num gold 1s:      207
iteration:      29 train time:     0.789m, eval time:     0.008m train loss:        0.364 train_acc:   86.200% dev loss:        0.468 dev_acc:   78.694%
....................

num predicted 1s: 263
num gold 1s:      323
num predicted 1s: 191
num gold 1s:      207
iteration:      30 train time:     0.813m, eval time:     0.008m train loss:        0.352 train_acc: 

DanClassifier(
  (mlp): MLP(
    (layers): ModuleList(
      (0): Linear(in_features=300, out_features=10, bias=True)
      (1): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [13]:
torch.save(model.state_dict(), "data/models/modeltimedan.pth")

### Forward

In [14]:
test_input, test_text = read_docs(test_file, vocab, num_padding_tokens=3)
labels=read_labels(test_label)

In [15]:
test_data = list(zip(test_input, labels))

In [16]:
evaluate_accuracy(model, test_data, batch_size=150, gpu=False)

num predicted 1s: 232
num gold 1s:      232


0.8101788170563962