In [1]:
import sys
sys.path.append('/home/jiajunb/neural-dimension-reduction')

In [2]:
import os

import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.nn import functional as F
from torch.utils.tensorboard import SummaryWriter

from src.models.distance_modeling import SurveyorDataSet, Surveyor, thesis_kl_div_add_mse_loss

import copy
from src.models.DenseNetwork import loss

torch.manual_seed(0)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<torch._C.Generator at 0x7f14340ee850>

In [3]:
train_dataset = SurveyorDataSet.from_df('/home/jiajunb/neural-dimension-reduction/data/train.csv')
val_dataset = SurveyorDataSet.from_df('/home/jiajunb/neural-dimension-reduction/data/train.csv')

HBox(children=(FloatProgress(value=0.0, description='create triplets', max=176.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='create triplets', max=176.0, style=ProgressStyle(descript…




In [15]:
train_dataset.pairs[:3]

tensor([[    0, 59915],
        [    1, 48861],
        [    2, 38164]])

In [16]:
train_dataset.pairs[-3:]

tensor([[89997, 16895],
        [89998, 19109],
        [89999, 40660]])

In [17]:
train_dataset.labels.sum()

tensor(90000)

In [18]:
len(train_dataset.labels)

180000

In [4]:
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=1000, pin_memory=True)

In [19]:
weight_decay = 1e-5
learning_rate =1e-6
num_epoches = 400  # 300
writer = SummaryWriter('runs/surveyornet')

In [20]:
device = torch.device('cuda:1')
model = Surveyor()
model = model.to(device)


no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(
        nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': weight_decay},
    {'params': [p for n, p in model.named_parameters() if any(
        nd in n for nd in no_decay) and p.requires_grad], 'weight_decay': 0.0}
]

optimizer = torch.optim.AdamW(params=optimizer_grouped_parameters, lr=learning_rate)


In [21]:
def train_one_epoch(train_loader, model, optimizer, verbose):
    model = model.to(device)
    model.train()
    loss_sum = 0.
    for i, batch in enumerate(train_loader):
        x1, x2, labels, q = batch
        x1, x2, labels, q = x1.to(device), x2.to(device), labels.to(device), q.to(device)
        logits, p, out1, out2, loss = model(x1, x2, q, labels)
        model.zero_grad()  # reset gradient
        loss.backward()
        optimizer.step()
        loss_sum += loss.item()
        if verbose and i % 20 == 0:
            print(f'training loss: {loss_sum / (i + 1):.4f}')
    return loss_sum / len(train_loader)

def val_one_epoch(val_loader, model):
    model.eval()
    loss_fn1 = nn.CrossEntropyLoss()
    loss_fn2 = thesis_kl_div_add_mse_loss
    preds_list = list()
    labels_list = list()
    val_xentropy_loss = 0.
    val_thesis_loss = 0.
    with torch.no_grad():
        for i, batch in enumerate(val_loader):
            x1, x2, labels, q = batch
            x1, x2, q = x1.to(device), x2.to(device), q.to(device)
            logits, p, out1, out2 = model(x1, x2, q, labels=None)
            preds = torch.argmax(F.softmax(logits, dim=1), dim=1)
            preds_list.append(preds.cpu())
            labels_list.append(labels.cpu())
            labels = labels.to(device)
            val_xentropy_loss += loss_fn1(logits, labels).item()
            val_thesis_loss += loss_fn2(p, q).item()
    y_preds = torch.cat(preds_list)
    y_golds = torch.cat(labels_list)
    accuracy = float((y_preds == y_golds).sum().item()) / len(y_preds)
    return val_xentropy_loss / len(y_preds), val_thesis_loss / len(y_preds), accuracy

In [22]:
def train_with_eval(train_loader, val_loader, model, optimizer, num_epoches, verbose):
    global writer
    best_model = None
    best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy = float('inf'), float('inf'), 0. 
    for epoch_idx in range(1, num_epoches + 1):
        avg_loss = train_one_epoch(train_loader, model, optimizer, False)
        avg_xentropy_loss, avg_thesis_loss, val_accuracy = val_one_epoch(val_loader, model)
        if val_accuracy >  best_val_accuracy:
            best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy = avg_xentropy_loss, avg_thesis_loss, val_accuracy
            best_model = copy.deepcopy(model.cpu())
        writer.add_scalar('train/avg_mixed_loss', avg_loss, epoch_idx)
        writer.add_scalar('val/avg_xentropy_loss', avg_xentropy_loss, epoch_idx)
        writer.add_scalar('val/avg_thesis_loss', avg_thesis_loss, epoch_idx)
        writer.add_scalar('val/avg_mixed_loss', avg_xentropy_loss + avg_thesis_loss, epoch_idx)
        writer.add_scalar('val/avg_thesis_loss', avg_thesis_loss, epoch_idx)
        writer.add_scalar('val/val_accuracy', val_accuracy, epoch_idx)
        if verbose and epoch_idx % 4 == 0:
            print(f'epoch [{epoch_idx}]/[{num_epoches}] training loss: {avg_loss:.4f} '
                  f'val_cross_entropy_loss: {avg_xentropy_loss:.4f} '
                  f'val_thesis_loss: {avg_thesis_loss:.4f} '
                  f'val_accuracy: {val_accuracy:.4f} ')
    return best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy, best_model, model

In [23]:
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=1000, pin_memory=True)

In [None]:
best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy, best_model, final_model = train_with_eval(train_loader, val_loader, model, optimizer, num_epoches, True)


epoch [4]/[400] training loss: 3.5788 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0026 val_accuracy: 0.5000 
epoch [8]/[400] training loss: 2.8313 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0020 val_accuracy: 0.5000 
epoch [12]/[400] training loss: 2.6545 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0019 val_accuracy: 0.5000 
epoch [16]/[400] training loss: 2.5720 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0018 val_accuracy: 0.5000 
epoch [20]/[400] training loss: 2.5172 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0018 val_accuracy: 0.5000 
epoch [24]/[400] training loss: 2.4717 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0017 val_accuracy: 0.5000 
epoch [28]/[400] training loss: 2.4319 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0017 val_accuracy: 0.5000 
epoch [32]/[400] training loss: 2.3991 val_cross_entropy_loss: 0.0007 val_thesis_loss: 0.0017 val_accuracy: 0.5000 
epoch [36]/[400] training loss: 2.3785 val_cross_entropy_loss: 0.0007 val_

In [None]:
best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy  # 400

In [None]:
best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy  # 300 1e-6

In [None]:
best_avg_xentropy_loss, best_avg_thesis_loss, best_val_accuracy  # 400 5e-7

In [None]:
writer.close()

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir=runs

In [None]:
# torch.save({
#     "best_model": best_model.state_dict(),
#     "best_avg_xentropy_loss": best_avg_xentropy_loss,
#     "best_avg_thesis_loss": best_avg_thesis_loss, 
#     "best_val_accuracy": best_val_accuracy
# }, '../saves/surveyor.on.full.0.7675')

In [None]:
best_model = Surveyor()
best_model.load_state_dict(torch.load('../saves/surveyor.on.full.0.745')['best_model'])
best_model.eval()

val_dataset = SurveyorDataSet.from_df('/home/jiajunb/neural-dimension-reduction/data/dev.csv')
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=1000, pin_memory=True)

gold = loss.nearest_neighbors(val_dataset.data.clone(), top_k=1, device='cuda')

In [None]:
def extract_embeddings(data_loader, model):
    model.eval()
    embedding = list()
    with torch.no_grad():
        for i, batch in enumerate(data_loader):
            x = batch.to(device)
            out = model.encode_batch(x)
            embedding.append(out.cpu())
    return torch.cat(embedding, dim=0)

val_x_embedded = extract_embeddings(val_loader, best_model)

In [None]:
nearenearest_neighborsneighbors

In [None]:
retriever = RetrieveSystem(best_model)
block_list = torch.arange(val_x_embedded.shape[0])
cls_pred_nn_top, p_distances_nn_top = retriever.retrieve_corpus(val_x_embedded, block_list, val_x_embedded)


In [None]:
cls_results = retriever.recall(cls_pred_nn_top, gold, at_n=None)
p_results = retriever.recall(p_distances_nn_top, gold, at_n=None)