In [1]:
!pip install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
import math
import numpy as np
from math import sqrt
from torch import nn
from torch.nn import init
from torch.nn import functional as F
from torchsummary import summary
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm, trange

In [3]:
# Here's MLP scoring test
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.leakyrelu = nn.LeakyReLU(0.2)
        # self.dropout = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x1, x2, max_len):
        cosine_score = F.cosine_similarity(x1, x2)
        tensor_similarity = torch.Tensor([[cosine_score]])
        tensor_max_len = torch.Tensor([[max_len]])

        tensor_similarity.requires_grad_(True)
        tensor_max_len.requires_grad_(True)

        x = torch.cat((x1, x2, tensor_similarity, tensor_max_len), dim=1)
        x = self.fc1(x)
        x = self.leakyrelu(x)
        x = self.fc2(x)

        x = torch.abs(x)

        return x

# # 创建并使用MLP模型
# input_dim = 50*2  # 连接后的向量维度
# hidden_dim = 128
# output_dim = 1  # 输出一个分数
# mlp = MLP(input_dim, hidden_dim, output_dim)

# # 生成示例输入向量
# vector1 = torch.randn(1, 50)  # 第一个向量
# vector2 = torch.randn(1, 50)  # 第二个向量

# # 将输入向量传递给MLP模型
# # score = mlp(vector1, vector2)

# # print('This is the score', score)  # 输出分数张量

### Training MLP vs cosine similarity

In [None]:
# Test cosine similarity with MLP

# 创建并使用MLP模型
input_dim = 50*2  # 连接后的向量维度
hidden_dim = 128
output_dim = 1  # 输出一个分数
mlp = MLP(input_dim, hidden_dim, output_dim)

# build 3 testing tensor
tensor1 = torch.randn(1, 50)
tensor2 = tensor1.clone()  # 通过克隆方式创建一个与tensor1相同的张量
tensor3 = torch.randn(1, 50) # 创建一个不相似的张量

# 将两个向量连接成一个输入向量
input_vector_1 = torch.cat((tensor1, tensor2), dim=1)  # 2 similar vectors
input_vector_2 = torch.cat((tensor1, tensor3), dim=1)  # 2 non-similar vectors

print(input_vector_1)
print(input_vector_2)

# 将输入向量传递给MLP模型
score_1 = mlp(tensor1, tensor2)
score_2 = mlp(tensor1, tensor3)

print('This is the score-1', score_1)  # 输出分数张量
print('This is the score-2', score_2)  # 输出分数张量


similarity_score_1 = F.cosine_similarity(tensor1, tensor2)
similarity_score_2 = F.cosine_similarity(tensor1, tensor3)

print('This is the cos score-1', similarity_score_1)  # 输出分数张量
print('This is the cos score-2', similarity_score_2)  # 输出分数张量

tensor([[ 0.9346, -0.0190, -0.0442, -1.1277, -2.2433,  1.0628,  0.2738, -0.7498,
          1.4525,  0.5115,  1.4996,  0.2842,  0.1637,  1.6438,  0.7192,  1.7105,
          0.7663,  0.1757, -0.3213, -0.1413,  0.4713, -0.1653, -1.0831,  1.7135,
         -0.0699,  0.8162,  0.8200,  0.9509,  1.7064, -1.0233,  1.0895,  0.7483,
         -0.1954, -1.1060, -1.2792, -2.2550, -0.4848,  0.8326,  1.9951, -0.8836,
          0.1315, -0.5436, -0.3903, -1.9608, -0.2627,  0.2624,  1.4792,  0.2979,
          1.4561, -0.0064,  0.9346, -0.0190, -0.0442, -1.1277, -2.2433,  1.0628,
          0.2738, -0.7498,  1.4525,  0.5115,  1.4996,  0.2842,  0.1637,  1.6438,
          0.7192,  1.7105,  0.7663,  0.1757, -0.3213, -0.1413,  0.4713, -0.1653,
         -1.0831,  1.7135, -0.0699,  0.8162,  0.8200,  0.9509,  1.7064, -1.0233,
          1.0895,  0.7483, -0.1954, -1.1060, -1.2792, -2.2550, -0.4848,  0.8326,
          1.9951, -0.8836,  0.1315, -0.5436, -0.3903, -1.9608, -0.2627,  0.2624,
          1.4792,  0.2979,  

### Training MLP test

In [4]:
# Build a Locater with CNN+MLP
''' CNN accept transcript or query and convert it into average embedding, 
    MLP accept query embedding & transcript embedding and convert it into a 2-D vector as <start, end> of relevant spans '''

class testCNNModel(nn.Module):
    def __init__(self):
        super(testCNNModel, self).__init__()
        
        self.conv1d = nn.Conv1d(in_channels=512, out_channels=256, kernel_size=1)
        # self.relu = nn.ReLU()
        # self.drop = nn.Dropout(0.2)
        self.maxpool = nn.MaxPool1d(kernel_size=1)
        self.linear = nn.Linear(196608, 50)   # fully-connected layer
    
    def forward(self, x):
        x = x.requires_grad_(True)
        x = self.conv1d(x)
        # x = self.drop(x)
        x = self.maxpool(x)
        x = torch.flatten(x, start_dim=1)  # 将张量展平
        x = self.linear(x)
        return x

class Locater(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.CNN = testCNNModel()
        self.MLP = MLP(input_dim, hidden_dim, output_dim)

    def forward(self, transcript_emb, query_emb, max_len):
        transcript_cnn_out = self.CNN(transcript_emb)
        query_cnn_out = self.CNN(query_emb)

        spans_id = self.MLP(transcript_cnn_out, query_cnn_out, max_len)

        return spans_id


### Training test

In [None]:
# Test data
transcript_emb = torch.randn(1, 512, 768)  # tensor for transcript
query_emb_1 = torch.randn(1, 512, 768)  # tensor for query
query_emb_2 = torch.randn(1, 512, 768)
query_emb_3 = torch.randn(1, 512, 768)
query_emb_4 = torch.randn(1, 512, 768)

transcript_emb.requires_grad_(True)
query_emb_1.requires_grad_(True)
query_emb_2.requires_grad_(True)
query_emb_3.requires_grad_(True)
query_emb_4.requires_grad_(True)

# Label
label_1 = torch.Tensor([24, 30])
label_2 = torch.Tensor([0, 10])
label_3 = torch.Tensor([24, 26])
label_4 = torch.Tensor([25, 32])

label_1.requires_grad_(True)
label_2.requires_grad_(True)
label_3.requires_grad_(True)
label_4.requires_grad_(True)

query_set = [query_emb_1, query_emb_2, query_emb_3, query_emb_4]
start_end_label = [label_1, label_2, label_3, label_4] #label

# Instantiate Locater
input_dim = 50*2  # Dimension after connected
hidden_dim = 128
output_dim = 2  # output <start, end> spans
locater = Locater(input_dim, hidden_dim, output_dim)


# Loss function
# criterion = nn.CrossEntropyLoss()  # can't work
criterion = nn.MSELoss()  

# Optimizer
from torch import optim
optimizer = optim.Adam(mlp.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
  running_loss = 0.0
  for i, (inputs_query, label) in enumerate(zip(query_set, start_end_label)):
    print('i=',i)
    print('inputs=',inputs_query)
    # 前向传播
    spans = locater(transcript_emb, inputs_query)
    loss = criterion(spans, label)  # 计算loss误差

    print('predicted spans: ', spans)

    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

    # 打印每个epoch的平均损失
  epoch_loss = running_loss / len(query_set)
  print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss}")
  print('------------------------------------------------------')

i= 0
inputs= tensor([[[-0.2619, -0.6147,  0.7968,  ..., -0.2643,  1.1212,  0.7796],
         [-0.1804, -0.4150, -0.6541,  ...,  0.7962,  1.4538, -1.2213],
         [-1.0690, -0.1759, -0.7373,  ..., -0.1449,  1.4295, -1.4209],
         ...,
         [-0.3537, -0.8999, -1.2690,  ...,  0.8678,  1.3526,  1.7176],
         [ 1.4875,  0.1943, -2.9266,  ...,  1.3069, -0.1743,  0.8847],
         [ 0.1588,  0.0406,  1.1313,  ..., -0.2899, -0.9426,  0.9801]]],
       requires_grad=True)
predicted spans:  tensor([[0., -0.]], grad_fn=<RoundBackward0>)
i= 1
inputs= tensor([[[ 0.4778,  1.3883, -0.8741,  ...,  1.4224,  2.2957, -0.3944],
         [ 2.6525, -0.4972,  1.7572,  ...,  0.0954, -0.4087, -0.5127],
         [-0.6099,  1.3736, -0.4484,  ...,  1.0382, -0.3736, -1.4498],
         ...,
         [ 0.2059,  0.4491, -1.7673,  ..., -0.4939,  0.3272, -0.9960],
         [ 0.8828,  0.0485, -1.8293,  ..., -0.2453,  0.9305, -1.6672],
         [-0.9344,  0.7967, -0.4213,  ...,  0.8275, -0.8511, -1.0107]]],

  return F.mse_loss(input, target, reduction=self.reduction)


predicted spans:  tensor([[0., -0.]], grad_fn=<RoundBackward0>)
i= 2
inputs= tensor([[[-0.9681,  0.2716,  1.4775,  ...,  1.4349, -0.4828, -2.3926],
         [-1.9622,  1.5685, -0.3171,  ..., -1.3052,  0.7432,  0.2840],
         [-0.3171, -1.1408,  0.8333,  ..., -0.0348,  0.1850,  0.9617],
         ...,
         [-1.1376, -0.0100,  0.7508,  ...,  0.2652,  0.7560, -0.2433],
         [-2.0999,  0.9153, -0.3850,  ...,  0.4209,  0.6160,  0.0331],
         [ 0.0519, -1.1740,  2.9918,  ...,  0.3874, -0.9176,  1.0621]]],
       requires_grad=True)
predicted spans:  tensor([[0., -0.]], grad_fn=<RoundBackward0>)
i= 3
inputs= tensor([[[ 1.4751, -2.2790, -0.5338,  ..., -0.2522, -0.2616, -1.3452],
         [ 0.3894,  1.0400, -0.5621,  ..., -0.7888,  0.2940, -1.3033],
         [-1.3584, -0.2238, -2.6254,  ..., -0.6882, -0.4461,  0.0454],
         ...,
         [-0.2897,  0.7729, -0.4392,  ...,  0.5651, -1.3621, -0.3709],
         [-1.3717, -1.3365, -0.6513,  ..., -0.4769,  0.2960,  0.5230],
        

### Training for 1 meeting 

In [5]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

corpus_root = 'drive/My Drive/Colab Notebooks/datasets/' 

Mounted at /content/drive


In [6]:
corpus_root_DS = 'drive/My Drive/Colab Notebooks/Data Science (Final Project)/CNN/'

# tensor_dict = torch.load(corpus_root_DS+'dictionary.pt')
new_tensor_dict = torch.load(corpus_root_DS+'VAL_TENSOR_ROBERTA.pt', map_location=torch.device('cpu')) # training with cpu
# new_tensor_dict = torch.load(corpus_root_DS+'dictionary_final_lc_ELECTRA.pt')

# print(tensor_dict)
# print(len(new_tensor_dict))
# print(new_tensor_dict['Meeting 156']['Length'])

In [11]:
print(new_tensor_dict.keys())

dict_keys(['meeting: ES2011b.json', 'meeting: Bed008.json', 'meeting: IS1003b.json', 'meeting: ES2011d.json', 'meeting: ES2004c.json', 'meeting: Bmr006.json', 'meeting: TS3004a.json', 'meeting: IS1003a.json', 'meeting: education_13.json', 'meeting: covid_4.json', 'meeting: covid_9.json', 'meeting: education_9.json', 'meeting: education_4.json', 'meeting: education_17.json', 'meeting: Bmr014.json', 'meeting: TS3004d.json', 'meeting: Bed003.json', 'meeting: TS3011b.json', 'meeting: IS1003d.json', 'meeting: Bro004.json', 'meeting: TS3004b.json', 'meeting: Bro027.json', 'meeting: ES2004b.json', 'meeting: TS3004c.json', 'meeting: IS1003c.json', 'meeting: Bed016.json', 'meeting: TS3011c.json', 'meeting: ES2011c.json', 'meeting: TS3011a.json', 'meeting: TS3011d.json', 'meeting: ES2004a.json', 'meeting: ES2011a.json', 'meeting: ES2004d.json', 'meeting: Bro019.json', 'meeting: Bmr023.json'])


In [20]:
print(new_tensor_dict['meeting: Bed008.json']['Meeting 0']['Transcript'])
print(new_tensor_dict['meeting: IS1003b.json']['Meeting 0'].keys())
print(new_tensor_dict['meeting: IS1003b.json'].keys())
print(new_tensor_dict.keys())

tensor([[[-0.1095,  0.0869,  0.1253,  ...,  0.0619, -0.2003,  0.1820],
         [-0.2126, -0.0530, -0.1508,  ..., -0.0317, -0.4251, -0.0024],
         [ 0.0358,  0.2351, -0.0924,  ..., -0.3321, -0.3012, -0.0865],
         ...,
         [ 0.3888, -0.3714, -0.0690,  ...,  0.5387,  0.4168, -0.5220],
         [ 0.3570, -0.3428, -0.0045,  ...,  0.5750,  0.3514, -0.5224],
         [-0.1269,  0.0083, -0.0225,  ...,  0.1302, -0.1517,  0.0016]]],
       requires_grad=True)
dict_keys(['Length', 'Transcript', 'Query', 'Spans'])
dict_keys(['Meeting 0'])
dict_keys(['meeting: ES2011b.json', 'meeting: Bed008.json', 'meeting: IS1003b.json', 'meeting: ES2011d.json', 'meeting: ES2004c.json', 'meeting: Bmr006.json', 'meeting: TS3004a.json', 'meeting: IS1003a.json', 'meeting: education_13.json', 'meeting: covid_4.json', 'meeting: covid_9.json', 'meeting: education_9.json', 'meeting: education_4.json', 'meeting: education_17.json', 'meeting: Bmr014.json', 'meeting: TS3004d.json', 'meeting: Bed003.json', 'm

In [None]:
print(tensor_dict['Query'][0].is_cuda)

False


### Train 1 meeting

In [None]:
# Training test from dictionary.pt (1 meeting)
writer = SummaryWriter()

transcript_emb = tensor_dict['Transcript']
query_set = tensor_dict['Query']
start_end_label = tensor_dict['Spans'] #label

# Instantiate Locater
input_dim = 50*2  # Dimension after connected
hidden_dim = 128
output_dim = 2  # output <start, end> spans
locater = Locater(input_dim, hidden_dim, output_dim)


# Loss function
# criterion = nn.CrossEntropyLoss()  # can't work
# criterion = nn.MSELoss()  
# criterion = nn.L1Loss()
criterion = nn.SmoothL1Loss()

# Optimizer
from torch import optim
# optimizer = optim.Adam(locater.parameters(), lr=0.001)
optimizer = optim.AdamW(locater.parameters(), lr=0.001)

num_epochs = 20

for epoch in tqdm(range(num_epochs)):
  running_loss = 0.0
  for i, (inputs_query, label) in enumerate(zip(query_set, start_end_label)):
    print('i=',i)
    # print('inputs=',inputs_query)

    # 前向传播
    spans = locater(transcript_emb, inputs_query)
    loss = criterion(spans.squeeze(dim=0), label)  # 计算loss误差
    spans.requires_grad_(True)

    print('predicted spans: ', spans.squeeze(dim=0))

    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    running_loss += loss.item()
  
  
    # 打印每个epoch的平均损失
  epoch_loss = running_loss / len(query_set)
  print(f" Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss} ")
  print('======================================================')
  print()

  0%|          | 0/20 [00:00<?, ?it/s]

i= 0
predicted spans:  tensor([0.0029, 0.0693], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([3.1264, 4.1405], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([18.1007, 20.4746], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([45.8711, 52.6227], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 91.4300, 101.5358], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([150.2351, 158.4020], grad_fn=<SqueezeBackward1>)


  5%|▌         | 1/20 [00:01<00:28,  1.52s/it]

 Epoch [1/20], Loss: 159.12432734171549 

i= 0
predicted spans:  tensor([277.2852, 288.8856], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([295.0707, 304.9908], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([262.3362, 271.5877], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([211.1555, 222.2758], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([166.6828, 198.1040], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([142.9354, 182.9619], grad_fn=<SqueezeBackward1>)


 10%|█         | 2/20 [00:02<00:26,  1.48s/it]

 Epoch [2/20], Loss: 196.61265309651694 

i= 0
predicted spans:  tensor([132.8606, 196.6265], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([124.2155, 194.7906], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([106.4772, 179.4150], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 79.0628, 171.1845], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 77.8639, 180.4614], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([ 80.1653, 189.2145], grad_fn=<SqueezeBackward1>)


 15%|█▌        | 3/20 [00:04<00:25,  1.48s/it]

 Epoch [3/20], Loss: 159.716921488444 

i= 0
predicted spans:  tensor([ 62.7230, 196.8436], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 67.5945, 214.4763], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([ 63.3274, 222.2055], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 52.7813, 205.8163], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 67.7884, 215.2268], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([ 87.6724, 226.2238], grad_fn=<SqueezeBackward1>)


 20%|██        | 4/20 [00:05<00:21,  1.35s/it]

 Epoch [4/20], Loss: 142.8383525212606 

i= 0
predicted spans:  tensor([ 61.0055, 210.4916], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 78.8297, 232.5424], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([ 93.1803, 245.7586], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 72.7943, 222.4083], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 90.6163, 233.1966], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([116.2414, 250.2220], grad_fn=<SqueezeBackward1>)


 25%|██▌       | 5/20 [00:06<00:19,  1.28s/it]

 Epoch [5/20], Loss: 139.77740605672201 

i= 0
predicted spans:  tensor([ 57.2374, 203.3742], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 82.2189, 231.5686], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([106.0460, 255.1878], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 80.9310, 229.8374], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([105.8539, 246.8605], grad_fn=<SqueezeBackward1>)
i= 5


 30%|███       | 6/20 [00:07<00:17,  1.24s/it]

predicted spans:  tensor([143.7714, 276.7004], grad_fn=<SqueezeBackward1>)
 Epoch [6/20], Loss: 132.72319348653158 

i= 0
predicted spans:  tensor([ 42.7896, 188.0005], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 77.8091, 227.5432], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([115.6057, 266.9289], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 84.2780, 238.7482], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([122.0007, 266.5358], grad_fn=<SqueezeBackward1>)
i= 5


 35%|███▌      | 7/20 [00:09<00:15,  1.22s/it]

predicted spans:  tensor([184.0380, 319.5281], grad_fn=<SqueezeBackward1>)
 Epoch [7/20], Loss: 120.37901624043782 

i= 0
predicted spans:  tensor([ 22.8353, 163.6276], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 71.1398, 223.2010], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([135.0935, 286.8292], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 99.3080, 252.9152], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([152.0494, 265.0077], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([250.1998, 330.0775], grad_fn=<SqueezeBackward1>)


 40%|████      | 8/20 [00:10<00:14,  1.21s/it]

 Epoch [8/20], Loss: 108.4999345143636 

i= 0
predicted spans:  tensor([11.2616, 57.0320], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 64.4046, 117.0545], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([161.7785, 201.6766], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([126.3252, 146.3518], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([218.4355, 223.1393], grad_fn=<SqueezeBackward1>)


 45%|████▌     | 9/20 [00:11<00:13,  1.18s/it]

i= 5
predicted spans:  tensor([375.4038, 363.5595], grad_fn=<SqueezeBackward1>)
 Epoch [9/20], Loss: 75.08273474375407 

i= 0
predicted spans:  tensor([43.5548, 61.1972], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([110.6614,  82.0781], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([267.6708, 261.2396], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([237.1729, 231.8964], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([346.5761, 356.6199], grad_fn=<SqueezeBackward1>)


 50%|█████     | 10/20 [00:12<00:11,  1.17s/it]

i= 5
predicted spans:  tensor([518.4061, 553.7761], grad_fn=<SqueezeBackward1>)
 Epoch [10/20], Loss: 60.23472913106283 

i= 0
predicted spans:  tensor([65.1389, 76.7626], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([32.5280,  4.2682], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([ 46.2113, 179.3389], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 50.3191, 208.1118], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([167.9173, 363.1923], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([365.7555, 580.6998], grad_fn=<SqueezeBackward1>)


 55%|█████▌    | 11/20 [00:13<00:10,  1.16s/it]

 Epoch [11/20], Loss: 65.32991377512614 

i= 0
predicted spans:  tensor([ 27.8716, 103.0100], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 52.0284, 217.2471], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([219.1190, 388.8535], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([238.8380, 381.8317], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([312.2619, 432.5737], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([400.5228, 506.3458], grad_fn=<SqueezeBackward1>)


 60%|██████    | 12/20 [00:15<00:10,  1.26s/it]

 Epoch [12/20], Loss: 72.60446866353352 

i= 0
predicted spans:  tensor([48.9929, 64.1287], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 74.3515, 101.2959], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([125.9869, 177.1232], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([104.6145, 153.5755], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([177.2158, 227.2640], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([283.5663, 318.0385], grad_fn=<SqueezeBackward1>)


 65%|██████▌   | 13/20 [00:16<00:09,  1.33s/it]

 Epoch [13/20], Loss: 91.02984301249187 

i= 0
predicted spans:  tensor([1.7987, 1.6867], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([18.7487, 45.8664], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([131.7638, 195.3521], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([144.2864, 208.9271], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([280.5299, 342.2043], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([456.8917, 494.7836], grad_fn=<SqueezeBackward1>)


 70%|███████   | 14/20 [00:18<00:08,  1.38s/it]

 Epoch [14/20], Loss: 31.99914614359538 

i= 0
predicted spans:  tensor([23.9573, 42.8029], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([108.8705, 174.9846], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([303.0322, 385.0316], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([316.0319, 391.2421], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([465.4420, 532.1199], grad_fn=<SqueezeBackward1>)
i= 5


 75%|███████▌  | 15/20 [00:19<00:06,  1.40s/it]

predicted spans:  tensor([603.5120, 621.9384], grad_fn=<SqueezeBackward1>)
 Epoch [15/20], Loss: 116.83044099807739 

i= 0
predicted spans:  tensor([ 41.9452, 110.6205], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([24.7881, 75.7924], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([ 8.8261, 22.2317], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([27.0840, 30.8781], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 33.7930, 131.0851], grad_fn=<SqueezeBackward1>)
i= 5


 80%|████████  | 16/20 [00:20<00:05,  1.32s/it]

predicted spans:  tensor([205.6203, 256.8073], grad_fn=<SqueezeBackward1>)
 Epoch [16/20], Loss: 153.52037239074707 

i= 0
predicted spans:  tensor([54.9526,  7.2816], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([63.4873, 37.9774], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([42.9915, 82.2832], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([ 34.1807, 106.4560], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([ 43.2029, 192.6434], grad_fn=<SqueezeBackward1>)
i= 5


 85%|████████▌ | 17/20 [00:21<00:03,  1.28s/it]

predicted spans:  tensor([216.9393, 302.9457], grad_fn=<SqueezeBackward1>)
 Epoch [17/20], Loss: 124.87818400065105 

i= 0
predicted spans:  tensor([ 20.5037, 144.9452], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 33.3673, 144.7903], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([  3.7868, 195.7876], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([  4.7459, 201.4151], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([152.4796, 326.0896], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([366.0160, 464.0138], grad_fn=<SqueezeBackward1>)


 90%|█████████ | 18/20 [00:23<00:02,  1.24s/it]

 Epoch [18/20], Loss: 77.59272193908691 

i= 0
predicted spans:  tensor([  7.0631, 100.9905], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([ 15.0573, 174.6504], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([167.9220, 326.5852], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([205.5013, 329.2081], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([395.0829, 485.9073], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([570.8956, 601.6559], grad_fn=<SqueezeBackward1>)


 95%|█████████▌| 19/20 [00:24<00:01,  1.24s/it]

 Epoch [19/20], Loss: 74.28375434875488 

i= 0
predicted spans:  tensor([12.9865, 40.0610], grad_fn=<SqueezeBackward1>)
i= 1
predicted spans:  tensor([15.4401,  0.2108], grad_fn=<SqueezeBackward1>)
i= 2
predicted spans:  tensor([52.1966, 47.7288], grad_fn=<SqueezeBackward1>)
i= 3
predicted spans:  tensor([62.9014, 38.6816], grad_fn=<SqueezeBackward1>)
i= 4
predicted spans:  tensor([193.1124, 170.7258], grad_fn=<SqueezeBackward1>)
i= 5
predicted spans:  tensor([331.5782, 300.7635], grad_fn=<SqueezeBackward1>)


100%|██████████| 20/20 [00:25<00:00,  1.28s/it]

 Epoch [20/20], Loss: 109.72569886843364 






### Train ALL meeting

In [7]:
# Training test from dictionary_final_lc.pt 

# This is for model summary(Not done yet)
writer = SummaryWriter()

# Instantiate Locater
input_dim = 2+(50*2)  # Dimension after connected
hidden_dim = 128
output_dim = 2  # output <start, end> spans
locater = Locater(input_dim, hidden_dim, output_dim)


# Loss function
# criterion = nn.CrossEntropyLoss()  # can't work
# criterion = nn.MSELoss()  
# criterion = nn.L1Loss()
criterion = nn.SmoothL1Loss()
# criterion = nn.CosineEmbeddingLoss()
# loss_range = torch.tensor([-1, 1]) # CosineEmbeddingLoss need to define range of similarity


# Optimizer
from torch import optim
# optimizer = optim.Adam(locater.parameters(), lr=0.001)
optimizer = optim.AdamW(locater.parameters(), lr=1e-5)

num_epochs = 10

for epoch in tqdm(range(num_epochs)):  # iterate epochs
  for meeting_num, meeting in new_tensor_dict.items():  # iterate meeting for an epoch

    # Get training data for one meeting
    transcript_emb = meeting['Transcript']
    query_set = meeting['Query']
    start_end_label = meeting['Spans'] #label
    meeting_len = meeting['Length'] # meeting length

    running_loss = 0.0

    for i, (inputs_query, label) in enumerate(zip(query_set, start_end_label)):  # iterate query for a meeting

      transcript_emb = transcript_emb.to('cpu')
      inputs_query = inputs_query.to('cpu')
      label = label.to('cpu')

      # 前向传播
      spans = locater(transcript_emb, inputs_query, meeting_len)
      loss = criterion(spans.squeeze(dim=0), label)  # 计算loss误差  (大部分loss function用這個)
      # loss = criterion(spans, label.unsqueeze(1), loss_range)  # 计算loss误差  (CosineEmbeddingLoss用這個)
      spans.requires_grad_(True)

      print('Meeting=',meeting_num, ' Query=',i,'  Predicted spans: ', spans.squeeze(dim=0), ' len=',meeting_len)

      # 反向传播和优化
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      
      running_loss += loss.item()
  
    print(" ")

    # 打印每个epoch的平均损失
  epoch_loss = running_loss / len(query_set)
  print(f" Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss} ")
  print('======================================================')
  print(" ")

  0%|          | 0/10 [00:00<?, ?it/s]

Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([4.1199, 3.1364], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([3.6990, 3.6026], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([3.5947, 4.0645], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([3.5934, 4.4931], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([3.6547, 4.8937], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([3.7477, 5.2890], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([5.7288, 7.1320], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([5.8590, 7.4861], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 2   Predicted spans:  tensor([6.0290, 7.9056], grad_fn=<SqueezeBackward1>) 

 10%|█         | 1/10 [03:47<34:05, 227.24s/it]

Meeting= Meeting 161  Query= 11   Predicted spans:  tensor([101.7793, 118.1770], grad_fn=<SqueezeBackward1>)  len= 304
 
 Epoch [1/10], Loss: 32.864761551221214 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([112.8806, 128.5631], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([112.3040, 128.3033], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([111.7395, 128.1685], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([112.9485, 128.4757], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([112.6581, 129.5703], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([112.7263, 130.0988], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([113.3842, 133.2535], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Qu

 20%|██        | 2/10 [07:28<29:50, 223.81s/it]

 
 Epoch [2/10], Loss: 32.64074389139811 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([116.1065, 137.9055], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([116.0161, 138.9758], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([114.4988, 138.4476], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([116.6507, 138.0686], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([116.2536, 140.5978], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([115.8712, 141.0251], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([117.6029, 142.6793], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([114.4163, 141.7306], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query=

 30%|███       | 3/10 [11:12<26:07, 223.94s/it]

Meeting= Meeting 161  Query= 11   Predicted spans:  tensor([100.7374, 117.3015], grad_fn=<SqueezeBackward1>)  len= 304
 
 Epoch [3/10], Loss: 32.08259232838949 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([120.2545, 153.3717], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([121.5058, 155.6527], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([119.1731, 154.5530], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([122.0375, 153.3386], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([122.2559, 157.1817], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([121.7798, 157.4591], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([124.1083, 157.1234], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Que

 40%|████      | 4/10 [14:52<22:14, 222.39s/it]

 
 Epoch [4/10], Loss: 32.34326171875 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([124.8955, 165.2717], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([127.3753, 169.2577], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([123.7433, 167.8475], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([126.8645, 165.4611], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([127.6298, 170.9120], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([126.7608, 171.2870], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([128.7286, 169.1741], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([123.8026, 167.4214], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 2 

 50%|█████     | 5/10 [18:28<18:20, 220.08s/it]

Meeting= Meeting 161  Query= 11   Predicted spans:  tensor([ 99.4127, 111.1810], grad_fn=<SqueezeBackward1>)  len= 304
 
 Epoch [5/10], Loss: 32.811427434285484 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([131.2261, 179.0330], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([135.1100, 184.9196], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([130.1325, 183.3513], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([133.6749, 178.7888], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([134.7899, 186.1600], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([133.7079, 186.4433], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([136.3669, 181.2089], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Qu

 60%|██████    | 6/10 [22:05<14:35, 218.79s/it]

 
 Epoch [6/10], Loss: 33.15457344055176 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([137.1747, 197.6821], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([142.5612, 206.0713], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([136.3203, 203.9992], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([139.9234, 195.9097], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([141.4749, 205.4717], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([140.3629, 205.1438], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([141.4358, 196.9419], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([134.3980, 195.3318], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query=

 70%|███████   | 7/10 [25:42<10:54, 218.22s/it]

 
 Epoch [7/10], Loss: 31.904618581136067 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([148.9113, 216.3211], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([155.7570, 226.7452], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([148.6142, 224.6383], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([151.1190, 214.2775], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([153.4090, 226.1852], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([152.3202, 225.6309], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([151.3095, 215.6150], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([144.0201, 213.9978], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query

 80%|████████  | 8/10 [29:18<07:15, 217.66s/it]

 
 Epoch [8/10], Loss: 31.5982240041097 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([162.4166, 228.0125], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([171.4716, 241.9927], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([163.3803, 238.6051], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([164.4023, 225.5741], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([168.1593, 240.1154], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([166.8724, 239.1997], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([164.0673, 226.7582], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 1   Predicted spans:  tensor([156.1952, 224.9028], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Query= 

 90%|█████████ | 9/10 [32:55<03:37, 217.49s/it]

Meeting= Meeting 161  Query= 11   Predicted spans:  tensor([ 98.8552, 122.4120], grad_fn=<SqueezeBackward1>)  len= 304
 
 Epoch [9/10], Loss: 31.31916904449463 
 
Meeting= Meeting 0  Query= 0   Predicted spans:  tensor([165.7161, 234.5899], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 1   Predicted spans:  tensor([176.5777, 251.4177], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 2   Predicted spans:  tensor([167.5720, 246.7949], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 3   Predicted spans:  tensor([167.3930, 232.2900], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 4   Predicted spans:  tensor([172.2281, 248.8180], grad_fn=<SqueezeBackward1>)  len= 178
Meeting= Meeting 0  Query= 5   Predicted spans:  tensor([170.8055, 247.2946], grad_fn=<SqueezeBackward1>)  len= 178
 
Meeting= Meeting 1  Query= 0   Predicted spans:  tensor([167.4520, 233.9067], grad_fn=<SqueezeBackward1>)  len= 258
Meeting= Meeting 1  Que

100%|██████████| 10/10 [36:36<00:00, 219.61s/it]

 
 Epoch [10/10], Loss: 30.81246344248454 
 





In [None]:
# Evaluate the performance of Locater


In [8]:
# Save the Locater
torch.save(locater, corpus_root_DS+'locater(ELECTRA).pth')

### Evaluate the Locator

In [7]:
locater = torch.load(corpus_root_DS+'locater(RoBERTa).pth')

In [None]:
# Evaluate the performance of Locater (with train set)

cos_sim_value = []
manhattan_dis = []
minkowski_dis = []

for meeting_num, meeting in new_tensor_dict.items():  # iterate meeting
   # Get training data for one meeting
    transcript_emb = meeting['Transcript']
    query_set = meeting['Query']
    start_end_label = meeting['Spans'] #label
    meeting_len = meeting['Length'] # meeting length

    for i, (inputs_query, label) in enumerate(zip(query_set, start_end_label)):  # iterate query for a meeting

      transcript_emb = transcript_emb.to('cpu')
      inputs_query = inputs_query.to('cpu')
      label = label.to('cpu')

      predicted_span = locater(transcript_emb, inputs_query, meeting_len)

      # Cosine similarity
      cos_sim = F.cosine_similarity(predicted_span, label)

      # Manhattan Distance
      manhattan_distance = torch.sum(torch.abs(predicted_span - label))

      # Minkowski Distance (p=1.5)
      minkowski_distance = F.pairwise_distance(predicted_span.unsqueeze(0), label.unsqueeze(0), p=1.5)

      print('Meeting ', meeting_num, 'Query ', i, 'len=', meeting_len)
      print('predicted_span=', predicted_span)
      print('')
      print('cos_sim=', cos_sim)
      print('Manhattan dis=', manhattan_distance)
      print('Minkowski dis=', minkowski_distance)
      print('')

      cos_sim_value.append(cos_sim) # cos_sim of all predicted_value & true value
      manhattan_dis.append(manhattan_distance) # manhattan of all predicted_value & true value
      minkowski_dis.append(minkowski_distance) # minkowski of all predicted_value & true value
  
    print('==============================')

print('Average cosine similarity for prediction =', (sum(cos_sim_value)/len(cos_sim_value)))
print('Average Manhattan distance for prediction =', (sum(manhattan_dis)/len(manhattan_dis)))
print('Average Minkowski distance for prediction =', (sum(minkowski_distance)/len(minkowski_distance)))

In [11]:
# Evaluate the performance of Locater (with validation set)

cos_sim_value = []
manhattan_dis = []
minkowski_dis = []
euclid_dis = []

for meeting_num, meeting in new_tensor_dict.items():  # iterate meeting
   # Get training data for one meeting
    transcript_emb = meeting['Meeting 0']['Transcript']
    query_set = meeting['Meeting 0']['Query']
    start_end_label = meeting['Meeting 0']['Spans'] #label
    meeting_len = meeting['Meeting 0']['Length'] # meeting length

    for i, (inputs_query, label) in enumerate(zip(query_set, start_end_label)):  # iterate query for a meeting

      transcript_emb = transcript_emb.to('cpu')
      inputs_query = inputs_query.to('cpu')
      label = label.to('cpu')

      predicted_span = locater(transcript_emb, inputs_query, meeting_len)

      # Cosine similarity
      cos_sim = F.cosine_similarity(predicted_span, label)

      # Manhattan Distance
      manhattan_distance = torch.sum(torch.abs(predicted_span - label))

      # Minkowski Distance (p=1.5)
      minkowski_distance = F.pairwise_distance(predicted_span.unsqueeze(0), label.unsqueeze(0), p=1.5)

      # Euclid Distance
      euclid = torch.sqrt(torch.sum((predicted_span - label) ** 2, dim=1))

      print('Meeting ', meeting_num, 'Query ', i, 'len=', meeting_len)
      print('predicted_span=', predicted_span, 'gold_span=', label)
      print('Euclid distance=', euclid)
      print('cos_sim=', cos_sim)
      print('Manhattan dis=', manhattan_distance)
      print('Minkowski dis=', minkowski_distance)
      print('')

      euclid_dis.append(euclid)  # Euclid of all predicted_value & true value
      cos_sim_value.append(cos_sim) # cos_sim of all predicted_value & true value
      manhattan_dis.append(manhattan_distance) # manhattan of all predicted_value & true value
      minkowski_dis.append(minkowski_distance) # minkowski of all predicted_value & true value
  
    print('==============================')

print('Average Euclid Distance for prediction =', (sum(euclid_dis)/len(euclid_dis)))
print('Average cosine similarity for prediction =', (sum(cos_sim_value)/len(cos_sim_value)))
print('Average Manhattan distance for prediction =', (sum(manhattan_dis)/len(manhattan_dis)))
print('Average Minkowski distance for prediction =', (sum(minkowski_distance)/len(minkowski_distance)))

Meeting  meeting: ES2011b.json Query  0 len= 361
predicted_span= tensor([[217.3151, 280.9408]], grad_fn=<AbsBackward0>) gold_span= tensor([ 1., 52.], requires_grad=True)
Euclid distance= tensor([314.9701], grad_fn=<SqrtBackward0>)
cos_sim= tensor([0.8026], grad_fn=<SumBackward1>)
Manhattan dis= tensor(445.2560, grad_fn=<SumBackward0>)
Minkowski dis= tensor([[353.4710]], grad_fn=<NormBackward1>)

Meeting  meeting: ES2011b.json Query  1 len= 361
predicted_span= tensor([[213.4664, 275.1346]], grad_fn=<AbsBackward0>) gold_span= tensor([73., 88.], requires_grad=True)
Euclid distance= tensor([233.9875], grad_fn=<SqrtBackward0>)
cos_sim= tensor([0.9995], grad_fn=<SumBackward1>)
Manhattan dis= tensor(327.6010, grad_fn=<SumBackward0>)
Minkowski dis= tensor([[261.3363]], grad_fn=<NormBackward1>)

Meeting  meeting: ES2011b.json Query  2 len= 361
predicted_span= tensor([[215.0727, 269.6426]], grad_fn=<AbsBackward0>) gold_span= tensor([ 89., 120.], requires_grad=True)
Euclid distance= tensor([195.6