### Import Libraries

In [1]:
from collections import OrderedDict
import torch
import tensorboardX
import dill as pickle

In [2]:
import sys
sys.path.append("sopa_master/")

In [3]:
from data import read_embeddings, read_docs, read_labels
from soft_patterns import ProbSemiring, MaxPlusSemiring, LogSpaceMaxTimesSemiring, SoftPatternClassifier, train, Batch, evaluate_model
from util import to_cuda
from interpret_classification_results import interpret_documents
from visualize import visualize_patterns
from baseline.cnn import PooledCnnClassifier

### Files

In [4]:
train_data_file = "data/time_data_clean/train.data"
train_label_file ="data/time_data_clean/train.labels"
dev_data_file = "data/time_data_clean/dev.data"
dev_label_file = "data/time_data_clean/dev.labels"
test_file = "data/time_data_clean/test.data"
test_label="data/time_data_clean/test.labels"

### Loading Embeddings

In [5]:
vocab = pickle.load(open("vocab.p","rb"))
embeddings = pickle.load(open("embeddings.p","rb"))
word_dim = pickle.load(open("word_dim.p","rb"))

# cnn

In [6]:
model=PooledCnnClassifier(
    window_size=4,
    num_cnn_layers=1,
    cnn_hidden_dim=50,
    mlp_hidden_dim=10,
    num_mlp_layers=2,
    num_classes=2,
    embeddings=embeddings
)

# params: 60582


### Training

In [7]:
train_input, train_text = read_docs(train_data_file, vocab, num_padding_tokens=3)
train_labels = read_labels(train_label_file)
dev_input, dev_text = read_docs(dev_data_file, vocab, num_padding_tokens=3)
dev_labels = read_labels(dev_label_file)

In [8]:
train_data = list(zip(train_input, train_labels))
dev_data = list(zip(dev_input, dev_labels))

In [9]:
train(
    train_data=train_data,
    dev_data=dev_data,
    model=model,
    model_save_dir="data/models/modeltimecnn/",
    num_iterations=250,
    model_file_prefix="traintimecnn",
    learning_rate=0.005,
    batch_size=150,
    num_classes=2,
    patience=30,
    gpu=False,
    dropout=0.2
)

..

  log_softmax(output).view(batch.size(), num_classes),


..................

num predicted 1s: 0
num gold 1s:      334
num predicted 1s: 1
num gold 1s:      207
iteration:       0 train time:     0.026m, eval time:     0.010m train loss:        0.656 train_acc:   66.600% dev loss:        0.627 dev_acc:   64.605%
New best acc!
New best dev!
saving model to modeltimecnn/traintimecnn_0.pth
....................

num predicted 1s: 113
num gold 1s:      325
num predicted 1s: 79
num gold 1s:      207
iteration:       1 train time:     0.060m, eval time:     0.010m train loss:        0.586 train_acc:   77.800% dev loss:        0.544 dev_acc:   76.289%
New best acc!
New best dev!
saving model to modeltimecnn/traintimecnn_1.pth
....................

num predicted 1s: 172
num gold 1s:      321
num predicted 1s: 119
num gold 1s:      207
iteration:       2 train time:     0.095m, eval time:     0.010m train loss:        0.452 train_acc:   84.300% dev loss:        0.435 dev_acc:   82.474%
New best acc!
New best dev!
saving model to modeltimecnn/traintime

PooledCnnClassifier(
  (cnn): Cnn(
    (cnn): Conv1d(300, 50, kernel_size=(4,), stride=(1,))
  )
  (mlp): MLP(
    (layers): ModuleList(
      (0): Linear(in_features=50, out_features=10, bias=True)
      (1): Linear(in_features=10, out_features=2, bias=True)
    )
  )
)

In [10]:
torch.save(model.state_dict(), "data/models/modeltimecnn.pth")

### Forward

In [11]:
test_input, test_text = read_docs(test_file, vocab, num_padding_tokens=3)
labels=read_labels(test_label)

In [12]:
test_data = list(zip(test_input, labels))

In [13]:
evaluate_model(model, test_data, batch_size=150, gpu=False)

num predicted 1s: 221
num gold 1s:      232


0.8858321870701513