In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sys
import torch
import pre_process as proc
import models, parameters, histories, trainer
import glovar

In [3]:
# Set global directories
glovar.set_dirs()

# Parse configuration settings from command line
sys.argv[1:] = ["model_name"]
params, arg_config = parameters.parse_arguments()


# Get or create History
history = histories.get(
    glovar.PKL_DIR, params.name, params.override, arg_config)


# Report config to be used
config = history.config
print(config)

Getting history with name model_name; override=False...
Exists: False
Creating...
Config as follows:
	_lambda 		0.0
	batch_size 		32
	grad_clip_norm 		0.0
	hidden_size 		300
	learning_rate 		0.001
	p_keep_fc 		0.9
	p_keep_input 		0.9
	p_keep_rnn 		0.9
	projection_size 	200
	tune_embeddings 	False



In [5]:
# Get training datasets, node labels and vector representations
train_set, labels = proc.procDataset("TRAIN")
test_set, _ = proc.procDataset("TEST", labels)
val_set, _ = proc.procDataset("VAL", labels)
if not test_set or not val_set:
    quit()

embedding_matrix = proc.getEmbeddings(labels)

Reading from stored dataset
Done. Processed dataset read in 0m, 44s.
Reading from stored dataset
Done. Processed dataset read in 0m, 12s.
No processed dataset found.
Processing VAL dataset (2400 files)...
Done. 2400 files processed in 7m, 8s.


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
# Split training datasets into batches
batch_size = config.batch_size
train_batches = trainer.batchDataset(batch_size, train_set)
test_batches = trainer.batchDataset(batch_size, test_set)
val_batches = trainer.batchDataset(5, val_set)
print("{} batches of size {} in training dataset.".format(len(train_batches), batch_size))
print("{} batches of size {} in tuning dataset".format(len(test_batches), batch_size))
print("{} batches of size 5 in validation dataset".format(len(val_batches)))

In [None]:
# Train the model
print('Loading model...')
model = models.ClassificationModel(params.name, config, embedding_matrix)

print('Loading trainer...')
tr = trainer.Trainer(model, history, train_batches, test_batches, glovar.CKPT_DIR)

print('Training...')
tr.train()

In [None]:
x_data = []
for i in range(len(history.epoch_losses)):
    x_data.append(i + 1)
fig, ax = plt.subplots(figsize=(10,5))
plt.xticks(np.arange(0, len(history.epoch_losses) + 1, step = 5))
ax.grid(linewidth=1)
ax.set_title('Training loss')
ax.set_xlabel('epoch')
ax.set_ylabel('loss')
fig.tight_layout()
ax.plot(x_data, history.epoch_losses)
plt.show()

In [None]:
x_data = []
for i in range(len(history.epoch_accs)):
    x_data.append(i + 1)
fig, ax = plt.subplots(figsize=(10,5))
plt.xticks(np.arange(0, len(history.epoch_accs) + 1, step=5))
plt.yticks(np.arange(0, 1, step=0.1))
ax.grid(linewidth=1)
ax.set_title('Model Accuracy')
ax.set_xlabel('epoch')
ax.set_ylabel('accuracy')
fig.tight_layout()
ax.plot(x_data, history.epoch_accs, 'k')
ax.plot(x_data, history.tuning_accs, 'r')
ax.legend(['train accuracy', 'test accuracy'])
plt.show()