-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_setup.py
157 lines (128 loc) · 7.35 KB
/
model_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Feb 23 15:29:14 2019
@author: ignatius
"""
import tensorflow as tf
import tensorflow.contrib.eager as tfe
#import matplotlib.pyplot as plt
try:
tf.enable_eager_execution()
print('Running in Eager mode.')
except ValueError:
print('Already running Eagerly')
# Only 1 hidden layer is enough
def configure_tagger(input_shape, output_length, dropout=0.0, batchnorm=True):
print(f'\nTagger configuration:\n\t- input shape={input_shape}\n\t- dropout={dropout}\n\t- batchnorm={batchnorm}')
if batchnorm:
tagger = tf.keras.Sequential([
# Input layer: a flattened 4x100 input matrix of word vectors
tf.keras.layers.Flatten(input_shape=input_shape, name='flatten_input'),
tf.keras.layers.BatchNormalization(axis=1),
# Hidden layer: 1 layer of 256 neurons + ReLU non-linearity activation
tf.keras.layers.Dense(256, activation=tf.nn.relu, name='input_to_H1'),
tf.keras.layers.BatchNormalization(axis=1),
# Dropout layer:
tf.keras.layers.Dropout(dropout),
# Output layer: with neuron size as length of the tagset
tf.keras.layers.Dense(output_length, name='H1_to_logits'), #name='H2_to_logits'
])
else:
tagger = tf.keras.Sequential([
# Input layer: a flattened 4x100 input matrix of word vectors
tf.keras.layers.Flatten(input_shape=input_shape, name='flatten_input'),
# Hidden layer: 1 layer of 256 neurons + ReLU non-linearity activation
tf.keras.layers.Dense(256, activation=tf.nn.relu, name='input_to_H1'),
# Dropout layer:
tf.keras.layers.Dropout(dropout),
# Output layer: with neuron size as length of the tagset
tf.keras.layers.Dense(output_length, name='H1_to_logits'), #name='H2_to_logits'
])
print('Model successfully configured!\nTraining in progress...')
return tagger
#The optimizer is responsible for controlling the learning rate
# train_tagger:(tagger, optimizer, num_epochs, result_point, eval_point)
def train_tagger(train_ds, eval_ds, model,
optimizer=tf.train.AdamOptimizer(),
num_epochs=100, result_point = 5, eval_point = 20):
step_counter = tf.train.get_or_create_global_step() # Just a variable that keeps track of how many training steps we've run
checkpoint_prefix = 'checkpoints/ckpt'
root = tf.train.Checkpoint(optimizer=optimizer,
model=model,
optimizer_step=tf.train.get_or_create_global_step())
summary_writer = tf.contrib.summary.create_file_writer('log')
summary_writer.set_as_default()
# Extract the eval instances and labels
eval_insts, eval_labels = eval_ds
# Lists to store the loss and accuracy of every epoch
epoch_losses, epoch_accuracies = [], []
# Lists to store the evaluation losses and accuracies
eval_losses, eval_accuracies = [], []
for epoch in range(1,num_epochs+1):
# Tensorflow provides a convenient API for tracking a number of metrics during training/evaluation
loss_avg = tfe.metrics.Mean()
accuracy = tfe.metrics.Accuracy()
# Loop over our data pipeline
for step, (instance_batch, label_batch) in enumerate(train_ds):
# Initialise a GradientTape to track the operations
with tf.GradientTape() as tape:
# Compute the logits (un-normalised scores) of the current batch of examples
# using the neural network architecture we defined earlier
logits = model(instance_batch, training=True)
# Compute the cross-entropy loss of the classification outputs on this batch
loss_value = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=label_batch)
# Compute the average loss over the batch
loss_value = tf.reduce_mean(loss_value)
# Add current batch loss to our loss metric tracker - note the function call semantics
loss_avg(loss_value)
# Compare most likely predicted label to actual label
accuracy(tf.argmax(logits, axis=1, output_type=tf.int32), label_batch)
# Play the tape backwards and get the gradient of the loss of the current batch
# Note we're now outside the scope of the with-block above
grads = tape.gradient(loss_value, model.variables)
# Use the optimizer to apply the gradients to the tagger parameters along with
# its internal learning rate
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
# Get the average loss and accuracy for the epoch
epoch_loss = loss_avg.result()
epoch_losses.append(epoch_loss)
epoch_accuracy = accuracy.result()
epoch_accuracies.append(epoch_accuracy)
if (epoch%result_point==0 and epoch) or epoch==num_epochs:
print(f"Epoch {epoch:02d}: Loss = {epoch_loss:.3f}, Accuracy = {epoch_accuracy:.3%}")
logits = model(eval_insts, training=False)
# Compute the cross-entropy loss of the classification outputs on this batch
loss_value = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=eval_labels)
# Compute the average loss over the batch
eval_loss = tf.reduce_mean(loss_value)
# Compare most likely predicted label to actual label
accuracy(tf.argmax(logits, axis=1, output_type=tf.int32), eval_labels)
eval_losses.append(eval_loss)
eval_accuracy = accuracy.result()
eval_accuracies.append(eval_accuracy)
# step_counter.assign_add(1)
with tf.contrib.summary.record_summaries_every_n_global_steps(1):
tf.contrib.summary.scalar('training_accuracy', epoch_accuracy)
tf.contrib.summary.scalar('training_loss', epoch_loss)
# tf.contrib.summary.merge(['training_accuracy','training_loss'])
tf.contrib.summary.scalar('evaluation_accuracy', eval_accuracy)
tf.contrib.summary.scalar('evaluation_loss', eval_loss)
# tf.contrib.summary.merge(['evaluation_accuracy','evaluation_loss'])
if (epoch%eval_point==0 and epoch) or epoch==num_epochs:
print(f"{42*'-'}\n-Eval {1+(epoch//eval_point):02d}: Loss = {eval_loss:.3f}, Accuracy = {eval_accuracy:.3%}\n{42*'='}\n")
root.save(checkpoint_prefix)
# tfe.Saver(model.weights).save(checkpoint_path, global_step=step_counter)
return epoch_accuracies, epoch_losses, eval_accuracies, eval_losses
def test_tagger(model, test_ds):
test_insts, test_labels = test_ds
logits = model(test_insts, training=False)
# Compute the cross-entropy loss of the classification outputs on this batch
loss_value = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=test_labels)
# Compute the average loss over the batch
test_loss = tf.reduce_mean(loss_value)
# Compare most likely predicted label to actual label
accuracy = tfe.metrics.Accuracy()
accuracy(tf.argmax(logits, axis=1, output_type=tf.int32), test_labels)
return accuracy.result(), test_loss