In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.model_selection import StratifiedKFold
import random
import os
import gc
import time

from constants import *
from callback import CustomCallback
from base_model import get_base_model
from head_model import get_combined_model
from loss import get_loss
from data_reading import read_data
from data_preparation import get_train_data, get_test_data
from utils import *

In [3]:
from tensorflow.python.client import device_lib

print(tf.__version__)
print(device_lib.list_local_devices())

2.2.0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 14184195797816832121
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 15303945521753371595
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 5254660559198242221
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 3868065792
locality {
  bus_id: 1
  links {
  }
}
incarnation: 1964583407835723230
physical_device_desc: "device: 0, name: GeForce GTX 1050 Ti with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [4]:
params = {
    "base_path": "..",
    "mode": "debug:1600",
    "att_num": 68,
    "weights_att_num": None,
    "n_fold": 1,
    "start_epoch": 4,
    "wo_fitting": False,

    "base_model": "roberta-base",
    "head_model": "default",
    "lr": 0.00003, # "base_lr": 0.00002, "max_lr": 0.00004,
    "loss": "CCE", #"{'CCE':1, 'JEL':0.1}",
    "label_smoothing": 0.3,
}

In [5]:
# Seed $ Logging
seed_everything(SEED)
log_dir_path, log_path = init_logging(params)

with open(log_path, 'a') as f:
    f.write(f'\n[base_model] {params["base_model"]}')

# Read  data
print("Read data...")
train_df, test_df, submission_df = read_data(params)

# Splitter
skf = StratifiedKFold(n_splits=N_SPLIT, shuffle=True, random_state=777)
splits = list(skf.split(train_df.index.values, train_df.sentiment.values))
tr_idx, val_idx = splits[params["n_fold"] - 1]
test_idx = np.arange(N_TEST)

if "debug" in params["mode"]:
    n_debug = int(params["mode"].split(":")[1])
    tr_idx, val_idx = tr_idx[:n_debug], val_idx[:n_debug]
    test_idx = test_idx[:n_debug]

# Build & Compile model
print("Build & Compile model...")
tokenizer, base_model = get_base_model(params)
combined_model = get_combined_model(base_model, params)

opt = tf.keras.optimizers.Adam(learning_rate=params["lr"])
loss = get_loss(params)
combined_model.compile(loss=loss, optimizer=opt)

# Prepare  data
print("Prepare data...")
known_idx = np.array(list(set(tr_idx) | set(val_idx)))
input_ids, attention_mask, token_type_ids, start_tokens, end_tokens, train_sample_ind2new_ind2old_ind = get_train_data(train_df, tokenizer, idx=known_idx)
test_word_ids, test_mask, test_segm_ids, test_sample_ind2new_ind2old_ind = get_test_data(test_df, tokenizer, idx=test_idx)

# # Model hash
# print(f'base_model hash: {np.array(base_model(test_word_ids[:16], test_mask[:16], test_segm_ids[:16])[0]).sum():.3}')
# print(f'head_model hash: {combined_model.layers[-6].weights[0].numpy().sum():.3}')

# Splitting data
print("Splitting data...")
tr_df = train_df.loc[tr_idx].reset_index(drop=True).set_index(tr_idx)
val_df = train_df.loc[val_idx].reset_index(drop=True).set_index(val_idx)

tr_word_ids, tr_mask, tr_segm_ids, tr_starts, tr_ends = input_ids[tr_idx,], attention_mask[tr_idx,], token_type_ids[tr_idx,], start_tokens[tr_idx,], end_tokens[tr_idx,]
tr_targets = np.concatenate([tr_starts, tr_ends], axis=1)
val_word_ids, val_mask, val_segm_ids, val_starts, val_ends = input_ids[val_idx,], attention_mask[val_idx,], token_type_ids[val_idx,], start_tokens[val_idx,], end_tokens[val_idx,]

# Check Correcness
print("Check Correcness...")
tr_df["is_correct"] = tr_df.apply(lambda row: (" " + row.text + " ").find(" " + row.selected_text + " ") >= 0, axis=1)
print(f'correct samples: {tr_df["is_correct"].mean():3f}')

tr_df["recover_selected_text"] = get_st_prediction(tr_starts, tr_ends, tr_df, train_sample_ind2new_ind2old_ind)
tr_df["recover_jaccard"] = tr_df.apply(lambda row: jaccard(row["recover_selected_text"], row["selected_text"]), axis=1)
assert np.all(tr_df[tr_df["is_correct"]]["recover_jaccard"] == 1)
print(f'preprocessing OK!')


print(f'##### FOLD {params["n_fold"]} #####')
gc.collect()

# Model Paths & Pretraining (optional)
best_weights_path = f'{log_dir_path}/{params["n_fold"]}/best_model.h5'
pre_trained_weights_path = f'../attempt_logs/{params["weights_att_num"] or params["att_num"]}/{params["n_fold"]}/best_model.h5'

pretrained_score = 0
# if os.path.exists(pre_trained_weights_path):
#     combined_model.load_weights(pre_trained_weights_path)
#     start_proba, end_proba = get_proba_prediction(combined_model, val_word_ids, val_mask, val_segm_ids)
#     pretrained_score = get_score(start_proba, end_proba, val_df, train_sample_ind2new_ind2old_ind)
#     with open(log_path, 'a') as f:
#         f.write(f'\nWeights PreTrained from {pre_trained_weights_path}, pretrained_score: {pretrained_score:.5f}')

# Training (optional)
if not params["wo_fitting"]:
    lr_scheduler = LearningRateScheduler(lambda epoch: 3e-5 * 0.2**epoch)
    custom_callback = CustomCallback(
        combined_model,
        val_word_ids, val_mask, val_segm_ids, val_df, train_sample_ind2new_ind2old_ind,
        params["n_fold"],
        params["start_epoch"],
        log_path,
        pretrained_score,
        best_weights_path
    )

    n_epoch = N_EPOCH - params["start_epoch"] + 1
    combined_model.fit(
        [tr_word_ids, tr_mask, tr_segm_ids], [tr_starts, tr_ends], #tr_targets,
        batch_size=BATCH_SIZE,
        epochs=n_epoch,
        callbacks=[
            custom_callback,
            lr_scheduler
        ],
        verbose=1,
    )
    
combined_model.load_weights(best_weights_path)

# scores = {}
# for name, word_ids, mask, segm_ids, df, sample_ind2new_ind2old_ind in [
#     ("train"      ,  tr_word_ids,   tr_mask,   tr_segm_ids,   tr_df, train_sample_ind2new_ind2old_ind),
#     ("validation" , val_word_ids,  val_mask,  val_segm_ids,  val_df, train_sample_ind2new_ind2old_ind),
#     ("test"      , test_word_ids, test_mask, test_segm_ids, test_df,  test_sample_ind2new_ind2old_ind)
# ]:
#     print(f'{name} prediction ...')
#     start_proba, end_proba = get_proba_prediction(combined_model, word_ids, mask, segm_ids)
#     if name != "test":
#         scores[name] = get_score(start_proba, end_proba, df, sample_ind2new_ind2old_ind)

# with open(log_path, 'a') as f:
#     f.write(f'\n[fold: {params["n_fold"]}] Ensure Scores : train score: {scores["train"]:.5f}, validation score: {scores["validation"]:.5f}]')

Read data...
Build & Compile model...
roberta-base
Prepare data...


HBox(children=(FloatProgress(value=0.0, max=3200.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1600.0), HTML(value='')))


Splitting data...
Check Correcness...
correct samples: 0.890625
preprocessing OK!
##### FOLD 1 #####
Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 

In [11]:
(tr_word_ids[0] == 1).sum()

98

In [12]:
(tr_word_ids[1] == 1).sum()

95

In [None]:
tr_starts[::, 50::].sum(), tr_ends[::, 50::].sum()

In [7]:
tr_starts[::, 113:]

array([], shape=(1600, 0), dtype=int32)

In [6]:
import numpy as np
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Dense, Concatenate, Activation
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import models
tf.compat.v1.disable_eager_execution() # for tf placeholders

def smoothed_cce_loss(y_true, y_pred):
    print("y_true", y_true)
    print("y_pred", y_pred)
#     ls = params["label_smoothing"] or 0
    ls = 0.1

#     max_len = K.shape(y_pred)[1] // 2
    max_len = 100
    start_pred, end_pred = y_pred[:, :max_len], y_pred[:, MAX_LEN:MAX_LEN + max_len]
    start_true, end_true = y_true[:, :max_len], y_true[:, MAX_LEN:MAX_LEN + max_len]
    print("start_pred", start_pred)
    print("end_pred", end_pred)
    print("start_true", start_true)
    print("end_true", end_true)
    

    start_loss = tf.keras.losses.categorical_crossentropy(start_true, start_pred, label_smoothing=ls)
    end_loss =   tf.keras.losses.categorical_crossentropy(  end_true,   end_pred, label_smoothing=ls)
    loss = tf.reduce_mean(start_loss + end_loss)

    return loss

In [7]:
MAX_LEN = 113

y_true = tr_targets[:8]
y_pred = tr_targets[:8]

y_true.shape, y_pred.shape

((8, 226), (8, 226))

In [8]:
max_len = 100

assert y_pred[::, max_len:MAX_LEN].sum() + y_pred[::, MAX_LEN + max_len:].sum() == 0
y_pred[::, max_len:MAX_LEN].shape, y_pred[::, MAX_LEN + max_len:].shape, 

((8, 13), (8, 13))

In [71]:
y_pred_inp = tf.compat.v1.placeholder(tf.float32, shape=[None, 226])
y_true_inp = tf.compat.v1.placeholder(tf.float32, shape=[None, 226])

cce = smoothed_cce_loss(y_pred_inp, y_true_inp)

sess = tf.compat.v1.Session()
jel = sess.run(cce, feed_dict={y_pred_inp: y_pred, y_true_inp: y_true})
jel

y_true Tensor("Placeholder_26:0", shape=(None, 226), dtype=float32)
y_pred Tensor("Placeholder_27:0", shape=(None, 226), dtype=float32)
start_pred Tensor("strided_slice_84:0", shape=(None, 100), dtype=float32)
end_pred Tensor("strided_slice_85:0", shape=(None, 100), dtype=float32)
start_true Tensor("strided_slice_86:0", shape=(None, 100), dtype=float32)
end_true Tensor("strided_slice_87:0", shape=(None, 100), dtype=float32)


3.1913834

In [12]:
tr_targets.shape

(160, 226)

In [None]:
base_model hash: 2.49e+04
head_model hash: -0.233