In [1]:
from datetime import datetime
import os
import pickle
import math
import time

import torch
from torch import nn, optim
from torch.utils import data
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import numpy as np

In [4]:
with open('embeddings/bert_embeddings.pickle', 'rb') as f:
    transcript_embeddings = pickle.load(f)

In [7]:
transcript_qna = []
for i in range(len(transcript_embeddings)):
    transcript_qna.append(transcript_embeddings[i][1])
qna_pairs = []

for i in range(len(transcript_qna)):
    counter = 0
    while(True):
        if counter > len(transcript_qna[i]) - 2: break
        qna_pairs.append((transcript_qna[i][counter][0], transcript_qna[i][counter+1][0]))
        counter+=2
print(len(qna_pairs))

34586


In [8]:
class QADataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, qna_list):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.qna = qna_list

    def __len__(self):
        return len(self.qna)

    def __getitem__(self, idx):
        return self.qna[idx][0], self.qna[idx][1]

In [9]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [10]:
class SiameseNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.L1 = nn.Linear(input_size, hidden_size)
        nn.init.kaiming_normal_(self.L1.weight)
        self.L2 = nn.Linear(hidden_size, output_size)
        nn.init.kaiming_normal_(self.L2.weight)
    
    def forward(self, x):
        x_L1 = self.L1(x)
        x_relu = torch.nn.functional.relu(x_L1)
        x_L2 = self.L2(x_relu)  
        return x_L2

In [53]:
loss_per_epoch = []
cos_sim_per_epoch = []
def train(network, train_data, dev_data, output_path, batch_size=16, n_epochs=50, lr=0.001):
    """ Train the Siamese Network.

    @param network (network): SiameseNetwork
    @param train_data ():
    @param dev_data ():
    @param output_path (str): Path to which model weights and results are written.
    @param batch_size (int): Number of examples in a single batch
    @param n_epochs (int): Number of training epochs
    @param lr (float): Learning rate
    """
    best_dev_cos_sim = 0
    optimizer = optim.Adam(network.parameters(),lr = lr)
    loss_func = nn.CosineEmbeddingLoss()
    
    for epoch in range(n_epochs):
        print("Epoch {:} out of {:}".format(epoch, n_epochs))
        dev_cos_sim = train_for_epoch(network, train_data, dev_data, optimizer, loss_func, batch_size)
        best_dev_cos_sim = dev_cos_sim
        if epoch % 10 == 0:
            print("Saving model for epoch: ", epoch)
        
            torch.save(network.state_dict(), output_path + "_" + str(epoch+0))
            print("")


def train_for_epoch(network, train_data, dev_data, optimizer, loss_func, batch_size):
    """ Train network for single epoch.

    @param network (Network): SiameseNetwork
    @param train_data ():
    @param dev_data ():
    @param optimizer (nn.Optimizer): Adam Optimizer
    @param loss_func (cos_sim): Cosine Similarity Loss Function
    @param lr (float): learning rate

    @return dev_cos_sim (float): Cosine Similarity scores for dev data
    """

    n_minibatches = math.ceil(len(train_data) / batch_size)
    loss_meter = AverageMeter()
    train_generator = data.DataLoader(train_data, batch_size = batch_size)
    dev_generator = data.DataLoader(dev_data, batch_size = batch_size)
    
    with tqdm(total=(n_minibatches)) as prog:
        for questions, answers in train_generator:
            optimizer.zero_grad()   # remove any baggage in the optimizer
            loss = 0. # store loss for this batch here
            train_q = questions.float()
            train_a = answers.float()
            output_q = network.forward(train_q)
            output_a = network.forward(train_a)
            
            loss = loss_func(output_q, output_a, torch.tensor(1, dtype=torch.float))
            for a_index in range(answers.shape[0]):
                q_index = np.random.choice(list(range(a_index)) + list(range(a_index + 1, output_a.shape[0])))  
                loss += loss_func(torch.reshape(output_q[q_index], (1, output_q[q_index].shape[0])), 
                                          torch.reshape(output_a[a_index], (1, output_a[a_index].shape[0])),torch.tensor(-1, dtype=torch.float)) 
                
            loss_per_epoch.append(loss)
            loss.backward()
            optimizer.step()

            ### END YOUR CODE
            prog.update(1)
            loss_meter.update(loss.item())

    print ("Average Train Loss: {}".format(loss_meter.avg))
    print("Evaluating on cosine similarity dev set",)
    with torch.set_grad_enabled(False):
        cosine_sim = 0.
        counter = 0.
        for questions, answers in dev_generator:
            counter += questions.shape[0]
            dev_q = questions.float()
            dev_a = answers.float()
            output_q = network.forward(dev_q)
            output_a = network.forward(dev_a)
            cosine_sim += torch.sum(torch.nn.functional.cosine_similarity(output_q, output_a, dim=1)).item()
    
    
        avg_cosine_sim = cosine_sim / counter
        cos_sim_per_epoch.append(avg_cosine_sim)
        print(avg_cosine_sim)
        print("- dev cosine similarity: {:.2f}".format(avg_cosine_sim))
        return avg_cosine_sim

In [None]:
input_size = len(qna_pairs[0][0])
hidden_dim = 256
output_dim = 128

print(80 * "=")
print("INITIALIZING")
print(80 * "=")

test_val_split = int(len(qna_pairs)*0.8)
train_data = QADataset(qna_pairs[0:test_val_split])
dev_data = QADataset(qna_pairs[test_val_split:])

start = time.time()
network =  SiameseNetwork(input_size, hidden_dim, output_dim)  
#network.load_state_dict(torch.load("./results/model.weights_1"))
print("took {:.2f} seconds\n".format(time.time() - start))

print(80 * "=")
print("TRAINING")
print(80 * "=")
output_dir = "results/"
output_path = output_dir + "bert.model.weights"

if not os.path.exists(output_dir): os.makedirs(output_dir)

train(network, train_data, dev_data, output_path)

  0%|          | 7/1730 [00:00<00:26, 64.57it/s]

INITIALIZING
took 0.01 seconds

TRAINING
Epoch 0 out of 50


100%|██████████| 1730/1730 [00:37<00:00, 46.42it/s]


Average Train Loss: 8.66540755178198
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:46, 37.47it/s]

0.1979689325735589
- dev cosine similarity: 0.20
Saving model for epoch:  0

Epoch 1 out of 50


100%|██████████| 1730/1730 [00:46<00:00, 37.45it/s]


Average Train Loss: 5.637772295034001
Evaluating on cosine similarity dev set


  0%|          | 5/1730 [00:00<00:43, 39.50it/s]

0.05859612340086559
- dev cosine similarity: 0.06
Epoch 2 out of 50


100%|██████████| 1730/1730 [00:46<00:00, 37.01it/s]


Average Train Loss: 5.035324134854223
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:45, 37.72it/s]

0.2918356102090259
- dev cosine similarity: 0.29
Epoch 3 out of 50


100%|██████████| 1730/1730 [00:47<00:00, 36.05it/s]


Average Train Loss: 4.948421853539571
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:47, 36.20it/s]

-0.022587285817841224
- dev cosine similarity: -0.02
Epoch 4 out of 50


100%|██████████| 1730/1730 [00:49<00:00, 35.01it/s]


Average Train Loss: 4.769491301485568
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:45, 37.73it/s]

-0.025293421321183694
- dev cosine similarity: -0.03
Epoch 5 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 34.42it/s]


Average Train Loss: 4.4849249173451025
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.47it/s]

-0.0463336994987273
- dev cosine similarity: -0.05
Epoch 6 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 34.30it/s]


Average Train Loss: 3.974111122340825
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:46, 37.05it/s]

0.013324591998645217
- dev cosine similarity: 0.01
Epoch 7 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.88it/s]


Average Train Loss: 3.8883413067442834
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.46it/s]

0.024403386163716716
- dev cosine similarity: 0.02
Epoch 8 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.84it/s]


Average Train Loss: 3.787956850480482
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.16it/s]

0.05288455871976621
- dev cosine similarity: 0.05
Epoch 9 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.59it/s]


Average Train Loss: 3.7671673940096286
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 34.67it/s]

0.0011799233629287203
- dev cosine similarity: 0.00
Epoch 10 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.64it/s]


Average Train Loss: 3.8994743607981355
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 34.91it/s]

0.13789769536275745
- dev cosine similarity: 0.14
Saving model for epoch:  10

Epoch 11 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.72it/s]


Average Train Loss: 4.427423766858316
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 34.76it/s]

0.04048688438551107
- dev cosine similarity: 0.04
Epoch 12 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.91it/s]


Average Train Loss: 5.51318619995448
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 34.84it/s]

0.14624374922570654
- dev cosine similarity: 0.15
Epoch 13 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 33.97it/s]


Average Train Loss: 4.415690172648843
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 34.63it/s]

0.2611609054915122
- dev cosine similarity: 0.26
Epoch 14 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 33.97it/s]


Average Train Loss: 4.24786587715838
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:45, 37.76it/s]

0.09551997763169626
- dev cosine similarity: 0.10
Epoch 15 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 34.09it/s]


Average Train Loss: 4.153577484147397
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:45, 37.54it/s]

0.14939367319192026
- dev cosine similarity: 0.15
Epoch 16 out of 50


100%|██████████| 1730/1730 [00:50<00:00, 34.07it/s]


Average Train Loss: 4.362666373204633
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:48, 35.35it/s]

-0.06014134801749757
- dev cosine similarity: -0.06
Epoch 17 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.60it/s]


Average Train Loss: 3.7870636236116377
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:47, 36.20it/s]

-0.03076638458240115
- dev cosine similarity: -0.03
Epoch 18 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.60it/s]


Average Train Loss: 3.724833618044164
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:49, 35.07it/s]

-0.13682180940114064
- dev cosine similarity: -0.14
Epoch 19 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.63it/s]


Average Train Loss: 3.6302294888248334
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:47, 36.32it/s]

-0.1600530024000798
- dev cosine similarity: -0.16
Epoch 20 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.61it/s]


Average Train Loss: 3.667358089216872
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.10it/s]

-0.17548132435577252
- dev cosine similarity: -0.18
Saving model for epoch:  20

Epoch 21 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.51it/s]


Average Train Loss: 3.6311314776109134
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 33.85it/s]

-0.16545641472136124
- dev cosine similarity: -0.17
Epoch 22 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.42it/s]


Average Train Loss: 3.5926025319650683
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.00it/s]

-0.15039660979087088
- dev cosine similarity: -0.15
Epoch 23 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.47it/s]


Average Train Loss: 3.5502701139174446
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:47, 36.67it/s]

-0.14151504408782464
- dev cosine similarity: -0.14
Epoch 24 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.52it/s]


Average Train Loss: 3.5340752690169164
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.07it/s]

-0.15443243078368357
- dev cosine similarity: -0.15
Epoch 25 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.43it/s]


Average Train Loss: 3.531532524293558
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.31it/s]

-0.061505749301479264
- dev cosine similarity: -0.06
Epoch 26 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.31it/s]


Average Train Loss: 3.4638949571317332
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.44it/s]

-0.1282866836110053
- dev cosine similarity: -0.13
Epoch 27 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.39it/s]


Average Train Loss: 3.467547821481793
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.04it/s]

-0.11175435235232209
- dev cosine similarity: -0.11
Epoch 28 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.29it/s]


Average Train Loss: 3.44669666152469
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.74it/s]

-0.1506699592674199
- dev cosine similarity: -0.15
Epoch 29 out of 50


100%|██████████| 1730/1730 [00:52<00:00, 33.25it/s]


Average Train Loss: 3.3966742276456316
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.78it/s]

-0.14152818516512833
- dev cosine similarity: -0.14
Epoch 30 out of 50


100%|██████████| 1730/1730 [00:51<00:00, 33.30it/s]


Average Train Loss: 3.4479054621189316
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.79it/s]

-0.10006491116259648
- dev cosine similarity: -0.10
Saving model for epoch:  30

Epoch 31 out of 50


100%|██████████| 1730/1730 [00:52<00:00, 33.07it/s]


Average Train Loss: 3.4363196893234473
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:51, 33.34it/s]

-0.11080197990909929
- dev cosine similarity: -0.11
Epoch 32 out of 50


100%|██████████| 1730/1730 [00:52<00:00, 33.03it/s]


Average Train Loss: 4.223400418227808
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:50, 34.32it/s]

0.27125523225804155
- dev cosine similarity: 0.27
Epoch 33 out of 50


100%|██████████| 1730/1730 [00:52<00:00, 32.84it/s]


Average Train Loss: 5.293248524073231
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:52, 32.70it/s]

-0.11981703032181852
- dev cosine similarity: -0.12
Epoch 34 out of 50


100%|██████████| 1730/1730 [00:53<00:00, 32.44it/s]


Average Train Loss: 3.4076885818746048
Evaluating on cosine similarity dev set


  0%|          | 4/1730 [00:00<00:52, 32.83it/s]

-0.03644860662082676
- dev cosine similarity: -0.04
Epoch 35 out of 50


 17%|█▋        | 294/1730 [01:33<3:10:08,  7.94s/it]

In [27]:
with open('embeddings/bert_embeddings_test.pickle', 'rb') as f:
    test_embeddings = pickle.load(f)

In [28]:
print(test_embeddings[0])

[[array([ 0.64303887,  1.710472  ,  1.7584287 ,  0.7257592 ,  1.9424343 ,
        0.9457477 ,  1.0656958 ,  2.053865  ,  1.6419497 ,  1.0993513 ,
        1.2427312 ,  0.8575021 ,  1.7092398 ,  0.95934933,  0.91018724,
        2.356356  ,  1.2408056 ,  1.2144847 ,  1.7015573 ,  0.7508159 ,
        1.6340787 ,  0.5028671 ,  1.281425  ,  2.1601427 ,  1.1313176 ,
        0.88218486,  0.7034144 ,  0.93418413,  1.0833174 ,  1.6817216 ,
        1.7536174 ,  1.459844  ,  1.23244   ,  1.2898546 ,  1.901853  ,
        1.3190624 ,  1.6615446 ,  1.2163279 ,  1.0940901 ,  1.8332223 ,
        0.2561578 ,  0.8745154 ,  2.0256484 ,  1.3893151 ,  0.9129074 ,
        1.0689849 ,  0.9507198 ,  0.96652305,  1.4505901 ,  1.2338183 ,
        0.6528628 ,  0.9208979 ,  0.9213959 ,  0.91340184,  1.2991549 ,
        1.6358504 ,  0.99389005,  0.056148  ,  0.7952534 ,  0.63787025,
        1.238049  ,  0.7773289 ,  1.0158437 ,  0.9560672 ,  1.4335897 ,
        0.9485135 ,  1.9032862 ,  1.5833483 ,  0.94732475,  1.

In [31]:
network =  SiameseNetwork(input_size, hidden_dim, output_dim)  
network.load_state_dict(torch.load("./results/bert.model.weights_0"))
embeded_transcripts = []

with torch.set_grad_enabled(False):
    for i in range(len(test_embeddings)):
        new_transcript, embed_statement, embed_qna = [], [], []

        statement =  test_embeddings[i][0]
        for j in range(len(statement)):
            output = network.forward(torch.tensor(statement[j], dtype=torch.float32))
            embed_statement.append(output.numpy())  
        new_transcript.append(embed_statement)

        qna = test_embeddings[i][1]
        for j in range(len(qna)): 
            output = network.forward(torch.tensor(qna[j][0], dtype=torch.float32))
            embed_qna.append((output.numpy(), qna[j][1]))  
        new_transcript.append(embed_qna)
        embeded_transcripts.append(new_transcript)

In [33]:
with open('embeddings/siameseBERT_epoch0.pickle', 'wb') as f:
    pickle.dump(embeded_transcripts, f)

In [19]:
ones = torch.ones((1, 100))
ones1 = torch.ones((1,100))
torch.nn.functional.cosine_similarity(ones, ones1).item()

1.0