In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
os.chdir('/content/drive/MyDrive/모델 구현/1주차/transformer')

In [None]:
import torch
import torch.nn as nn
import numpy as np
import copy

def One_hot_encoding(x, vocab_size):
  # x : batch_size * sentence_length , 이미 단어가 정수로 표현되어 있다고 가정
  batch_size = x.size(0)
  sentence_length = x.size(1)

  one_hot_x = torch.zeros(batch_size, sentence_length, vocab_size).cuda()

  for i in range(batch_size):
    for j in range(sentence_length):
      one_hot_x[i][j][x[i][j]] = 1

  return one_hot_x

def Positional_encoding(x):
  # x는 embedding 된 x라고 가정
  batch_size = x.size(0)
  sentence_length = x.size(1)
  d_model = x.size(2)

  positional_encoding_x = torch.zeros(sentence_length, d_model).cuda()

  for pos in range(sentence_length):
    for i in range(d_model):
      if i % 2 == 0:
        positional_encoding_x[pos][i] = np.sin((pos + 1) / (10000 ** (float(i) / d_model)))
      else:
        positional_encoding_x[pos][i] = np.cos((pos + 1) / (10000 ** (float(i-1) / d_model)))

  positional_encoding_x = positional_encoding_x.repeat(batch_size, 1, 1)

  return positional_encoding_x

class Positional_embedding(nn.Module):
  # 원핫인코딩, 임베딩, 포지셔널 인코딩 한번에 처리
  def __init__(self, d_model, vocab_number):
    super(Positional_embedding, self).__init__()
    self.d_model = d_model
    self.vocab_number = vocab_number
    self.embedder = nn.Linear(vocab_number, d_model)

  def forward(self, x):
    one_hot_x = One_hot_encoding(x, self.vocab_number)
    embedding_x = self.embedder(one_hot_x)
    positional_encoidng_x = Positional_encoding(embedding_x)

    return embedding_x + positional_encoidng_x


def scaled_dot_product_attention(Q, K, V, mask, dropout):
  d_k = Q.size(-1)

  Kt = torch.transpose(K, -2, -1)
  QKt = torch.bmm(Q, Kt) # QKt : batch_size * sentence_length(Q) * sentence_length(K)
  masked_QKt = QKt.masked_fill(mask == 1, -1e9) # masking해야되는 값(pad)의 index는 True or False로 return할것이기 때문에 1로

  attention_score = masked_QKt/np.sqrt(d_k)
  attention_distribution = F.softmax(masked_QKt/np.sqrt(d_k), dim = -1)
  attention_distribution = dropout(attention_distribution)
  attention_value = torch.bmm(attention_distribution, V)

  return attention_value #  batch_size * sentence_length(Q) * d_v

class Single_head_attention(nn.Module):
  def __init__(self, d_model, d_k, d_v):
    super(Single_head_attention, self).__init__()
    self.linear_q = nn.Linear(d_model, d_k) # d_k = d_model / num_heads , num_heads는 동시 진행 어텐션 개수
    self.linear_k = nn.Linear(d_model, d_k)
    self.linear_v = nn.Linear(d_model, d_v)

  def forward(self, Q, K, V, mask, dropout):
    q = self.linear_q(Q)
    k = self.linear_k(K)
    v = self.linear_v(V)

    attention_value = scaled_dot_product_attention(q, k, v, mask, dropout)

    return attention_value  # batch_size * sentence_length * d_v

def module_copy(module, N): 
  # single_head_attention을 num_heads 만큼 복사해서 multi_head_attention 만들기
  return nn.ModuleList([copy.deepcopy(module) for i in range(N)])

class Multi_head_attention(nn.Module):
  def __init__(self, d_model, d_k, d_v, num_heads, p = 0.1):
    super(Multi_head_attention, self).__init__()
    self.h = num_heads
    self.multiattention = module_copy(Single_head_attention(d_model, d_k, d_v), num_heads)
    self.linear = nn.Linear(num_heads * d_v, d_model)
    self.dropout = nn.Dropout(p)

  def forward(self, Q, K, V, mask):
    pre_cat_attention = []

    for i in range(self.h):
      pre_cat_attention.append(self.multiattention[i](Q, K, V, mask, self.dropout))

    post_cat_attention = torch.cat(pre_cat_attention, dim = -1)

    return self.linear(post_cat_attention)

class Encoder(nn.Module):
  def __init__(self, d_model, d_k, d_v, d_ff, h, p_drop = 0.1):
    super(Encoder, self).__init__()
    self.attention_layer = Multi_head_attention(d_model, d_k, d_v, h)
    self.dropout1 = nn.Dropout(p_drop)
    self.ln1 = nn.LayerNorm(d_model)

    self.feed_forward_layer = nn.Sequential(
                                    nn.Linear(d_model, d_ff),
                                    nn.ReLU(),
                                    nn.Dropout(p_drop),
                                    nn.Linear(d_ff, d_model))
    self.ln2 = nn.LayerNorm(d_model)
    self.dropout2 = nn.Dropout(p_drop)

  def forward(self, x, mask):
    x1 = self.attention_layer(x, x, x, mask)
    x2 = x + self.dropout1(x1) 
    x3 = self.ln1(x2)

    x4 = self.feed_forward_layer(x3)
    x5 = x3 + self.dropout2(x4)
    x6 = self.ln2(x5)
        
    return x6

def make_mask(src, trg, src_padding, trg_padding):
  src_n = src.size(1) # src_sentence_length
  trg_n = trg.size(1) # trg_sentence_length

  src_padding_mask = (src == src_padding).unsqueeze(1) # batch_size * 1 * src_sentence_length
  trg_padding_mask = (trg == trg_padding).unsqueeze(1) # batch_size * 1 * trg_sentence_length

  src_mask = src_padding_mask.repeat(1, src_n, 1) # src_pad인 부분을 QKt(self attention이니까 행,열 전부 src_sentence_length길이) 행만큼 늘리기
  memory_mask = src_padding_mask.repeat(1, trg_n, 1) # trg가 Q, src가 K 이므로 QKt는 batch_size * src_sentence_length * trg_sentence_length
  trg_mask = trg_padding_mask.repeat(1, trg_n, 1) # batch_size * trg_sentence_length * trg_sentence_length

  return src_mask, memory_mask, trg_mask

class Decoder(nn.Module):
  def __init__(self, d_model, d_k, d_v, d_ff, h, p_drop = 0.1):
    super(Decoder, self).__init__()
    self.self_attention_layer = Multi_head_attention(d_model, d_k, d_v, h)
    self.dropout1 = nn.Dropout(p_drop)
    self.ln1 = nn.LayerNorm(d_model)

    self.enc_dec_attention_layer = Multi_head_attention(d_model, d_k, d_v, h)
    self.dropout2 = nn.Dropout(p_drop)
    self.ln2 = nn.LayerNorm(d_model)

    self.feed_forward_layer = nn.Sequential(
                                nn.Linear(d_model, d_ff),
                                nn.ReLU(),
                                nn.Dropout(p_drop),
                                nn.Linear(d_ff, d_model))
    self.dropout3 = nn.Dropout(p_drop)
    self.ln3 = nn.LayerNorm(d_model)

  def forward(self, x, trg_mask, K, V, memory_mask):
    sentence_length = x.size(1)
        
    self_mask = np.triu(np.ones((1, sentence_length, sentence_length)), k=1) # k=1로 하면 (0,0)도 0으로. 첫 단어는 바로 참고할 수 있으니까
    self_mask = torch.BoolTensor((self_mask) == 1).cuda() # batch_size * sentence_length * sentence_length
        
    x1 = self.self_attention_layer(x, x, x, trg_mask | self_mask)
    x2 = x + self.dropout1(x1) 
    x3 = self.ln1(x2)

    x4 = self.enc_dec_attention_layer(x3, K, V, memory_mask)
    x5 = x3 + self.dropout2(x4) 
    x6 = self.ln2(x5)

    x7 = self.feed_forward_layer(x6)
    x8 = x6 + self.dropout3(x7)
    x9 = self.ln3(x8)

    return x9

class Transformer(nn.Module):
  def __init__(self, num_enc, num_dec, src_vocab_size, trg_vocab_size, d_model, d_k, d_v, d_ff, h):
    super(Transformer, self).__init__()
    self.num_enc = num_enc
    self.src_embedding = Positional_embedding(d_model, src_vocab_size)
    self.encoder = module_copy(Encoder(d_model, d_k, d_v, d_ff, h), num_enc)

    self.num_dec = num_dec
    self.trg_embedding = Positional_embedding(d_model, trg_vocab_size)
    self.decoder = module_copy(Decoder(d_model, d_k, d_v, d_ff, h), num_dec)

    self.linear = nn.Linear(d_model, trg_vocab_size)
        
  def forward(self, src, trg):
    src_mask, memory_mask, trg_mask = make_mask(src, trg, 2, 2) # pad_token = 2

    src = self.src_embedding(src)
    for i in range(self.num_enc):
        src = self.encoder[i](src, src_mask)
        
    trg = self.trg_embedding(trg)
    for i in range(self.num_dec):
        trg = self.decoder[i](trg, trg_mask, src, src, memory_mask)

    pred = self.linear(trg) # batch_size * sentence_length * trg_vocab_size, softmax를 통해 확률 높은 한 단어 선정

    return pred 

In [None]:
import torch.nn.functional as F

from dataset.dataloader import load_data, get_loader
from dataset.field import Vocab
from utils import seq2sen

import time

In [None]:
def save_model(epoch, model, optimizer, loss, step_num, path):
    state = {
        'epoch' : epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
        'step_num' : step_num
        }
    
    torch.save(state, path)
    print('save complete : ' + path )

In [None]:
path = '/content/drive/MyDrive/모델 구현/1주차/transformer/multi30k'

In [None]:
# 일정 수(4) 이상 나온 단어들 묶음 + init, eos, pad, unk token을 합쳐서 단어꾸러미 만듦
# load는 이 만들어진 단어꾸러미의 단어에 각각 정수를 부여한 dict 자료형을 생성 

src, tgt = load_data(path)
src_vocab = Vocab(init_token='<sos>', eos_token='<eos>', pad_token='<pad>', unk_token='<unk>')
src_vocab.load(os.path.join(path, 'vocab.en'))
tgt_vocab = Vocab(init_token='<sos>', eos_token='<eos>', pad_token='<pad>', unk_token='<unk>')
tgt_vocab.load(os.path.join(path, 'vocab.de'))

In [None]:
sos_idx = 0
eos_idx = 1
pad_idx = 2
max_length = 50

num_enc = 6
num_dec = 6
d_model = 512
d_k = 64
d_v = 64
d_ff = 2048  # 피드 포워드 신경망 은닉층 차원
h = 8     # num_heads, 병렬로 어텐션을 수행할 때 병렬의 개수 

In [None]:
# 원 핫 인코딩 차원수 결정
src_vocab_size = len(src_vocab)
tgt_vocab_size = len(tgt_vocab)

print(src_vocab_size)
print(tgt_vocab_size)

3935
4376


In [None]:
# preprocessing, postprocessing, 단어 정수화, 배치화 등등 dataloader 기능 수행
# 이때, 문장 중 vocab에 있는 단어는 해당하는 정수가 부여되지만, 없는 단어는 unk_token (=3)이 붙음
train_loader = get_loader(src['train'], tgt['train'], src_vocab, tgt_vocab, batch_size=128, shuffle=True)
valid_loader = get_loader(src['valid'], tgt['valid'], src_vocab, tgt_vocab, batch_size=128)

In [None]:
device = torch.device("cuda" if(torch.cuda.is_available()) else "cpu")
transformer = Transformer(6, 6, src_vocab_size, tgt_vocab_size, d_model, d_k, d_v, d_ff, h).to(device)
optimizer = torch.optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

step_num = 1
warmup_steps = 4000
losses = []

In [None]:
for epoch in range(100):
  start_epoch = time.time()
  i = 0

  for src_batch, tgt_batch in train_loader:
    start_batch = time.time()

    src_batch = torch.tensor(src_batch).to(device) # 현재 상태는 batch_size x sentence_length, 단어가 dict value(정수)로 되어있음
    trg_batch = torch.tensor(tgt_batch).to(device)
                
    trg_input = trg_batch[:,:-1] # 마지막 단어는 input으로 들어가지 않으니까 
    trg_output = trg_batch[:,1:].contiguous().view(-1) # 처음 단어(sos_token)는 output으로 나오지 않으니까, F.crossentorpy를 위해 일자로 펴줌
                
    pred = transformer(src_batch, trg_input)

    # lr scheduling
    lr = (d_model ** -0.5) * min(step_num ** -0.5, step_num * (warmup_steps ** -1.5))
    for group in optimizer.param_groups: # optimizer.param_groups 에는 dict 형태로 각 항목별 parameter가 저장되어 있음
      group['lr'] = lr #학습률은 lr을 key로 저장되어 있으므로, 모든 가중치의 학습률을 해당값으로 고정위해 for문 사용

    optimizer.zero_grad()

    # F.cross_entropy 의 input은 (N,C)형태로, target은 (N)형태로
    loss = F.cross_entropy(pred.view(-1, pred.size(-1)), trg_output, ignore_index = 2) 
    loss.backward()

    optimizer.step()
    step_num += 1

    i = i+1
    losses.append(loss.item())

    batch_time = time.time() - start_batch
    print(f'[{epoch + 1}/{100}][{i}/{train_loader.size // 128 + 1}] train loss : {loss.item()} , duration : {batch_time}')
                
  
  # TODO: validation
  i = 0
  for src_batch, tgt_batch in valid_loader:
    start_batch = time.time()

    src_batch = torch.tensor(src_batch).to(device)
    trg_batch = torch.tensor(tgt_batch).to(device)
                
    trg_input = trg_batch[:,:-1]
    trg_output = trg_batch[:,1:].contiguous().view(-1)
                
    pred = transformer(src_batch, trg_input)
    loss = F.cross_entropy(pred.view(-1, pred.size(-1)), trg_output, ignore_index = 2)

    i = i + 1
    batch_time = time.time() - start_batch
    print(f'[{epoch+1}/{100}][{i}/{valid_loader.size//128 + 1}] valid loss : {loss.item()} , duration : {batch_time}')
            
  epoch_time = time.time() - start_epoch
  print(f'Time taken for {epoch + 1} epoch : {epoch_time}')

  save_model(epoch, transformer, optimizer, loss, step_num, f'checkpoints/epoch_{epoch+1}')

print('End of the training')
save_model(epoch, transformer, optimizer, loss, step_num, f'checkpoints/final')

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
[27/100][67/227] train loss : 0.5082462430000305 , duration : 2.572721481323242
[27/100][68/227] train loss : 0.5436937212944031 , duration : 2.0062477588653564
[27/100][69/227] train loss : 0.5108552575111389 , duration : 2.019770622253418
[27/100][70/227] train loss : 0.5435999631881714 , duration : 1.9947874546051025
[27/100][71/227] train loss : 0.46190744638442993 , duration : 2.003833532333374
[27/100][72/227] train loss : 0.5511971712112427 , duration : 2.0238571166992188
[27/100][73/227] train loss : 0.5693036317825317 , duration : 2.3657307624816895
[27/100][74/227] train loss : 0.5136569142341614 , duration : 2.0941174030303955
[27/100][75/227] train loss : 0.47555968165397644 , duration : 2.0688672065734863
[27/100][76/227] train loss : 0.4899560213088989 , duration : 2.0565402507781982
[27/100][77/227] train loss : 0.5607917904853821 , duration : 2.2178995609283447
[27/100][78/227] train loss : 0.4933624565601349 , duration 

In [None]:
device = torch.device("cuda" if(torch.cuda.is_available()) else "cpu")
transformer = Transformer(6, 6, src_vocab_size, tgt_vocab_size, d_model, d_k, d_v, d_ff, h).to(device)
optimizer = torch.optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

In [None]:
checkpoint = torch.load('checkpoints/epoch_94')
transformer.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
step_num = checkpoint['step_num']

warmup_steps = 4000
losses = []

In [None]:
for epoch in range(94, 100):
  start_epoch = time.time()
  i = 0

  for src_batch, tgt_batch in train_loader:
    start_batch = time.time()

    src_batch = torch.tensor(src_batch).to(device) # 현재 상태는 batch_size x sentence_length, 단어가 dict value(정수)로 되어있음
    trg_batch = torch.tensor(tgt_batch).to(device)
                
    trg_input = trg_batch[:,:-1] # 마지막 단어는 input으로 들어가지 않으니까 
    trg_output = trg_batch[:,1:].contiguous().view(-1) # 처음 단어(sos_token)는 output으로 나오지 않으니까, F.crossentorpy를 위해 일자로 펴줌
                
    pred = transformer(src_batch, trg_input)

    # lr scheduling
    lr = (d_model ** -0.5) * min(step_num ** -0.5, step_num * (warmup_steps ** -1.5))
    for group in optimizer.param_groups: # optimizer.param_groups 에는 dict 형태로 각 항목별 parameter가 저장되어 있음
      group['lr'] = lr #학습률은 lr을 key로 저장되어 있으므로, 모든 가중치의 학습률을 해당값으로 고정위해 for문 사용

    optimizer.zero_grad()

    # F.cross_entropy 의 input은 (N,C)형태로, target은 (N)형태로
    loss = F.cross_entropy(pred.view(-1, pred.size(-1)), trg_output, ignore_index = 2) 
    loss.backward()

    optimizer.step()
    step_num += 1

    i = i+1
    losses.append(loss.item())

    batch_time = time.time() - start_batch
    print(f'[{epoch + 1}/{100}][{i}/{train_loader.size // 128 + 1}] train loss : {loss.item()} , duration : {batch_time}')
                
  
  # TODO: validation
  i = 0
  for src_batch, tgt_batch in valid_loader:
    start_batch = time.time()

    src_batch = torch.tensor(src_batch).to(device)
    trg_batch = torch.tensor(tgt_batch).to(device)
                
    trg_input = trg_batch[:,:-1]
    trg_output = trg_batch[:,1:].contiguous().view(-1)
                
    pred = transformer(src_batch, trg_input)
    loss = F.cross_entropy(pred.view(-1, pred.size(-1)), trg_output, ignore_index = 2)

    i = i + 1
    batch_time = time.time() - start_batch
    print(f'[{epoch+1}/{100}][{i}/{valid_loader.size//128 + 1}] valid loss : {loss.item()} , duration : {batch_time}')
            
  epoch_time = time.time() - start_epoch
  print(f'Time taken for {epoch + 1} epoch : {epoch_time}')

  save_model(epoch, transformer, optimizer, loss, step_num, f'checkpoints/epoch_{epoch+1}')

print('End of the training')
save_model(epoch, transformer, optimizer, loss, step_num, f'checkpoints/final')

[95/100][1/227] train loss : 0.04744343459606171 , duration : 2.071930170059204
[95/100][2/227] train loss : 0.05429507791996002 , duration : 2.191410779953003
[95/100][3/227] train loss : 0.06257300078868866 , duration : 2.4649291038513184
[95/100][4/227] train loss : 0.04626242816448212 , duration : 2.1073529720306396
[95/100][5/227] train loss : 0.05769095942378044 , duration : 1.698716163635254
[95/100][6/227] train loss : 0.04606064781546593 , duration : 2.0681581497192383
[95/100][7/227] train loss : 0.0574239045381546 , duration : 2.055542230606079
[95/100][8/227] train loss : 0.06097881495952606 , duration : 1.8857898712158203
[95/100][9/227] train loss : 0.0510072335600853 , duration : 2.101860761642456
[95/100][10/227] train loss : 0.04692315682768822 , duration : 2.430664539337158
[95/100][11/227] train loss : 0.06217252090573311 , duration : 1.8660914897918701
[95/100][12/227] train loss : 0.07439733296632767 , duration : 1.9172017574310303
[95/100][13/227] train loss : 0.0

In [None]:
device = torch.device("cuda" if(torch.cuda.is_available()) else "cpu")
transformer = Transformer(6, 6, src_vocab_size, tgt_vocab_size, d_model, d_k, d_v, d_ff, h).to(device)
optimizer = torch.optim.Adam(transformer.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

checkpoint = torch.load('checkpoints/epoch_31')
transformer.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

In [None]:
# test
test_loader = get_loader(src['test'], tgt['test'], src_vocab, tgt_vocab, batch_size=128)
pred = []
# TODO: predict pred_batch from src_batch with your model.
# every sentences in pred_batch should start with <sos> token (index: 0) and end with <eos> token (index: 1).
# every <pad> token (index: 2) should be located after <eos> token (index: 1).
# example of pred_batch:
# [[0, 5, 6, 7, 1],
#  [0, 4, 9, 1, 2],
#  [0, 6, 1, 2, 2]]

In [None]:
for src_batch, tgt_batch in test_loader:
  batch_size = len(tgt_batch)
  src_batch = torch.tensor(src_batch).to(device)
  pred_batch = torch.zeros(batch_size, 1, dtype = int).to(device) # [[0],[0],...,[0]]
  # eos_mask[i] = 1 means i-th sentence has eos
  eos_mask = torch.zeros(batch_size, dtype = int)

  for _ in range(max_length):
    output = transformer(src_batch, pred_batch) # batch_size * sentence_length * tgt_vocab_size
    output = torch.argmax(F.softmax(output, dim = -1), dim = -1) # batch_size * sentence_length , argmax로 최대값을 가진 인덱스를 리턴하여 해당 단어 정수 리턴
    predictions = output[:,-1].unsqueeze(1) # 예측 단어로 마지막 단어의 정수만 뽑아냄, pred_batch(sos_token 0)와 연결 위해 차원추가
    pred_batch = torch.cat([pred_batch, predictions], dim = -1) # pred_batch와 연결해서 디코더에 들어가는 새로운 pred_batch 생성

    for i in range(batch_size):
      if predictions[i] == eos_idx: # 문장의 마지막이 eos_idx이면, 0이던 값을 i번째는 1로 변환(끝난 문장을 알 수 있음)
        eos_mask[i] = 1 

    if eos_mask.sum() == batch_size :
      break # 모든 문장에서 eos가 나왔으므로 굳이 max_length까지 가지 않고 break
    
  pred += seq2sen(pred_batch.cpu().numpy().tolist(), tgt_vocab) # tensor를 list형태로 변환하고 정수 인덱스를 단어로 변환

In [None]:
pred

['Ein Mann mit orangefarbener Mütze starrt auf etwas .',
 'Ein Fan rennt auf <unk> grünem Gras vor einem weißen Zaun .',
 'Ein Mädchen in <unk> , das einen Stock trägt , mit einem <unk> .',
 'Fünf Menschen mit <unk> und Helm stehen im Schnee , <unk> im Hintergrund',
 'Leute befinden sich auf dem Dach eines Hauses .',
 'Ein Mann in hellen Kleidern fotografiert eine Gruppe Männer mit dunkler Kleidung und Hüten steht um eine Frau mit einem <unk> @-@ Frisur .',
 'Eine Gruppe von Menschen steht vor einem <unk> .',
 'Ein Junge in einer roten Uniform versucht , beim <unk> im blauen Trikot einen , während der Fänger im blauen Trikot zu berühren .',
 'Ein Typ arbeitet an einem Gebäude .',
 'Ein Mann in einer Weste sitzt auf einem Stuhl und hält Zeitschriften .',
 'Eine Mutter und ihr kleine Orchester genießen im Freien ein schöner Tag .',
 'Männer spielen Volleyball , während ein <unk> <unk> in die Höhe hält , sodass er in der Luft ist .',
 'Eine Frau hält eine Schüssel Essen in einer Küche .',

In [None]:
with open('result/pred(epoch_31).txt', 'w') as f:
  for line in pred:
    f.write(line + '\n') # txt 파일로 번역 문장 작성
f.close() 

In [None]:
with open('multi30k/test.de.atok', 'r') as f:
  lines = f.readlines()
  for line in lines:
    print(line)

Ein Mann mit einem orangefarbenen Hut , der etwas anstarrt .

Ein Boston Terrier läuft über saftig @-@ grünes Gras vor einem weißen Zaun .

Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt .

Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund .

Leute Reparieren das Dach eines Hauses .

Ein hell gekleideter Mann fotografiert eine Gruppe von Männern in dunklen Anzügen und mit Hüten , die um eine Frau in einem trägerlosen Kleid herum stehen .

Eine Gruppe von Menschen steht vor einem Iglu .

Ein Junge in einem roten Trikot versucht , die Home Base zu erreichen , während der Catcher im blauen Trikot versucht , ihn zu fangen .

Ein Typ arbeitet an einem Gebäude .

Ein Mann in einer Weste sitzt auf einem Stuhl und hält Magazine .

Eine Mutter und ihr kleiner Sohn genießen einen schönen Tag im Freien .

Männer , die Volleyball spielen , wobei ein Mann den Ball nicht trifft , während seine Hände immer noch in der Luft sind .

Eine Frau

In [None]:
from nltk.translate.bleu_score import sentence_bleu

bleu_sum = 0 

for i in range(len(pred)):
  candidate = pred[i].split()
  reference = lines[i].split()
  bleu = sentence_bleu(reference, candidate)
  bleu_sum += bleu

print(bleu_sum/1000)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


0.5331641162777885
