Skip to content
This repository has been archived by the owner on Mar 3, 2024. It is now read-only.

Commit

Permalink
Add STS-B demo
Browse files Browse the repository at this point in the history
  • Loading branch information
CyberZHG committed Aug 3, 2019
1 parent 3579fb3 commit 83f5170
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 0 deletions.
Empty file added demo/GLUE/STS-B/__init__.py
Empty file.
110 changes: 110 additions & 0 deletions demo/GLUE/STS-B/stsb.py
@@ -0,0 +1,110 @@
import os

import numpy as np
from scipy.stats import pearsonr, spearmanr

from keras_xlnet.backend import keras
from keras_bert.layers import Extract
from keras_xlnet import PretrainedList, get_pretrained_paths
from keras_xlnet import Tokenizer, load_trained_model_from_checkpoint, ATTENTION_TYPE_BI

EPOCH = 10
BATCH_SIZE = 32
SEQ_LEN = 140
MODEL_NAME = 'STS-B.h5'

current_path = os.path.dirname(os.path.abspath(__file__))
train_path = os.path.join(current_path, 'train.tsv')
dev_path = os.path.join(current_path, 'dev.tsv')

paths = get_pretrained_paths(PretrainedList.en_cased_base)
tokenizer = Tokenizer(paths.vocab)


# Read data
class DataSequence(keras.utils.Sequence):

def __init__(self, x, y):
self.x = x
self.y = y

def __len__(self):
return (len(self.y) + BATCH_SIZE - 1) // BATCH_SIZE

def __getitem__(self, index):
s = slice(index * BATCH_SIZE, (index + 1) * BATCH_SIZE)
return [item[s] for item in self.x], self.y[s]


def generate_sequence(path):
tokens, classes, scores = [], [], []
max_len = 0
with open(path) as reader:
reader.readline()
for line in reader:
line = line.strip()
parts = line.split('\t')
encoded_a, encoded_b = tokenizer.encode(parts[7]), tokenizer.encode(parts[8])
encoded = encoded_a + [tokenizer.SYM_SEP] + encoded_b + [tokenizer.SYM_SEP]
max_len = max(max_len, len(encoded))
encoded = [tokenizer.SYM_PAD] * (SEQ_LEN - 1 - len(encoded)) + encoded + [tokenizer.SYM_CLS]
tokens.append(encoded)
classes.append(round(float(parts[9])))
scores.append(float(parts[9]))
tokens, classes = np.array(tokens), np.array(classes)
segments = np.zeros_like(tokens)
segments[:, -1] = 1
lengths = np.zeros_like(tokens[:, :1])
return DataSequence([tokens, segments, lengths], classes), scores


current_path = os.path.dirname(os.path.abspath(__file__))
train_seq, _ = generate_sequence(train_path)
dev_seq, scores = generate_sequence(dev_path)

# Load pretrained model
model = load_trained_model_from_checkpoint(
config_path=paths.config,
checkpoint_path=paths.model,
batch_size=BATCH_SIZE,
memory_len=0,
target_len=SEQ_LEN,
in_train_phase=False,
attention_type=ATTENTION_TYPE_BI,
)


# Build classification model
last = Extract(index=-1, name='Extract')(model.output)
dense = keras.layers.Dense(units=768, activation='tanh', name='Dense')(last)
dropout = keras.layers.Dropout(rate=0.1, name='Dropout')(dense)
output = keras.layers.Dense(units=6, activation='softmax', name='Softmax')(dropout)
model = keras.models.Model(inputs=model.inputs, outputs=output)
model.summary()


# Fit model
if os.path.exists(MODEL_NAME):
model.load_weights(MODEL_NAME)

model.compile(
optimizer=keras.optimizers.Adam(lr=3e-5),
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'],
)

model.fit_generator(
generator=train_seq,
validation_data=dev_seq,
epochs=EPOCH,
callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)],
)

model.save_weights(MODEL_NAME)

# Evaluation
# Use dev set because the results of test set is unknown
classes = np.array([[0], [1], [2], [3], [4], [5]])
results = np.dot(model.predict_generator(dev_seq, verbose=True), classes).squeeze(axis=-1)
print('Pearson: %.2f' % (100.0 * pearsonr(results, scores)[0]))
print('Spearman: %.2f' % (100.0 * spearmanr(results, scores)[0]))

0 comments on commit 83f5170

Please sign in to comment.