# Load dataset

In [50]:
import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [51]:
file_id = '1o2qka7h7K1DJhj2hBoK7hLJpxe7gtc2-'
destination = 'cup.json'
download_file_from_google_drive(file_id, destination)

In [52]:
import json
with open('cup.json', 'r') as fp:
    cup = json.load(fp)

In [53]:
cup[0].keys()

dict_keys(['C1', 'C2', 'M1', 'M2'])

In [54]:
# Filter lenght
import numpy as np

comment_len_bound = 3
code_len_bound = 5

mask = np.ones(len(cup), dtype=bool)

for i, data in enumerate(cup):
  C1 = data['C1']
  C2 = data['C2']
  M1 = data['M1']
  M2 = data['M2']
  if len(C1) < comment_len_bound or len(C2) < comment_len_bound or len(M1) < code_len_bound or len(M2) < code_len_bound:
    mask[i] = False
cup = np.array(cup)[mask]

In [55]:
len(cup)

37643

# Label Dataset

In [56]:
def create_dataset(data):
  dataset = []
  for sample in data:
    C1 = sample['C1']
    C2 = sample['C2']
    M1 = sample['M1']
    M2 = sample['M2']
    # New code and old comment --> inconsistency 
    dataset.append({'C': C1, 'M': M2, 'Y': 'INCONS'})
    # New code and new comment --> consistency
    dataset.append({'C': C2, 'M': M2, 'Y': 'CONS'})
  return dataset

In [57]:
dataset = create_dataset(cup)
dataset[0]

{'C': ['▁get', '▁the', '▁p', 'im', '▁interface'],
 'M': ['▁public',
  '▁interface',
  '▁get',
  '▁interface',
  '▁return',
  '▁this',
  '▁onos',
  '▁interface'],
 'Y': 'INCONS'}

In [58]:
import json
from pathlib import Path
Path("data").mkdir(parents=True, exist_ok=True)
with open('data/dataset.json', 'w') as f:
  for data in dataset:
    f.write(json.dumps(data) + '\n')

In [59]:
with open('data/dataset.json', 'r') as f:
  print(f.read()[:500])

{"C": ["\u2581get", "\u2581the", "\u2581p", "im", "\u2581interface"], "M": ["\u2581public", "\u2581interface", "\u2581get", "\u2581interface", "\u2581return", "\u2581this", "\u2581onos", "\u2581interface"], "Y": "INCONS"}
{"C": ["\u2581return", "\u2581the", "\u2581onos", "\u2581interface"], "M": ["\u2581public", "\u2581interface", "\u2581get", "\u2581interface", "\u2581return", "\u2581this", "\u2581onos", "\u2581interface"], "Y": "CONS"}
{"C": ["\u2581gets", "\u2581ctl", "\u2581schemas", "\u2581


# NBOW2

In [None]:
import torch
from torchtext import data
from torchtext import datasets

CODE = data.Field(tokenize=lambda x: x)
COMMENT = data.Field(tokenize=lambda x: x)
LABEL = data.LabelField(dtype = torch.float)
fields = {'C': ('c', COMMENT), 'M': ('m', CODE), 'Y': ('y', LABEL)}

In [None]:
my_data = data.TabularDataset(
                            path = 'data/dataset.json',
                            format = 'json',
                            fields = fields
)

In [None]:
print(vars(my_data[0]))

In [None]:
import random
SEED = 1234
train_data, test_data = my_data.split(random_state = random.seed(SEED))
train_data, val_data = train_data.split(split_ratio=0.8, random_state = random.seed(SEED))

In [None]:
MAX_VOCAB_SIZE = 25_000

COMMENT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE)

CODE.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE)

LABEL.build_vocab(train_data)

In [None]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(val_data)}')
print(f'Number of testing examples: {len(test_data)}')

In [None]:
print(f"Unique tokens in COMMENT vocabulary: {len(COMMENT.vocab)}")
print(f"Unique tokens in CODE vocabulary: {len(CODE.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

In [None]:
print(CODE.vocab.freqs.most_common(20))

In [None]:
print(COMMENT.vocab.freqs.most_common(20))

In [None]:
print(COMMENT.vocab.itos[:10])

In [None]:
print(LABEL.vocab.stoi)

In [None]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.c) + len(x.m), # the BucketIterator needs to be told what function it should use to group the data.
    sort_within_batch=False,
    device = device)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class NBOW2(nn.Module):
    def __init__(self, comment_vocab_size, code_vocab_size, embedding_dim, output_dim, comment_pad_idx, code_pad_idx):
        
        super().__init__()
        
        self.embedding1 = nn.Embedding(comment_vocab_size, embedding_dim, padding_idx=comment_pad_idx)
        self.embedding2 = nn.Embedding(code_vocab_size, embedding_dim, padding_idx=code_pad_idx)
        
        self.fc = nn.Linear(2 * embedding_dim, output_dim)
        
        self.a1 = torch.nn.Parameter(torch.zeros(embedding_dim, 1))
        self.a2 = torch.nn.Parameter(torch.zeros(embedding_dim, 1))
        
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self, comment, code):
        
        #comment = [comment len, batch size]
        #code = [code len, batch size]
        
        embedded1 = self.embedding1(comment)
        embedded2 = self.embedding2(code)

        #embedded = [sent len, batch size, emb dim]
        
        embedded1 = embedded1.permute(1, 0, 2)
        embedded2 = embedded2.permute(1, 0, 2)
  
        #embedded = [batch size, sent len, emb dim]
        
        # Get the scalar word importance weights for each word w
        # a: [emb dim, 1]
        # aw: [batch size, sent len, 1]
        
        aw1 = torch.matmul(embedded1, self.a1)
        aw1 = self.sigmoid(aw1)

        aw2 = torch.matmul(embedded2, self.a2)
        aw2 = self.sigmoid(aw2)
        
        # w_emb: [batch size, sent len, emb dim]
        weighted_embed1 = embedded1 * aw1
        weighted_embed2 = embedded2 * aw2

        pooled1 = F.avg_pool2d(weighted_embed1, (weighted_embed1.shape[1], 1)).squeeze(1) 
        pooled2 = F.avg_pool2d(weighted_embed2, (weighted_embed2.shape[1], 1)).squeeze(1)
        #pooled = [batch size, embedding_dim]

        code_comment = torch.cat((pooled1, pooled2), dim=1)
                
        return self.fc(code_comment)
      
    def init_weights(self):
        initrange = 0.5

        self.a1.data.uniform_(0.0, 1.0)
        self.a2.data.uniform_(0.0, 1.0)

        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

In [None]:
CODE_INPUT_DIM = len(CODE.vocab)
COMMENT_INPUT_DIM = len(COMMENT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
COMMENT_PAD_IDX = COMMENT.vocab.stoi[COMMENT.pad_token]
CODE_PAD_IDX = CODE.vocab.stoi[CODE.pad_token]
model = NBOW2(COMMENT_INPUT_DIM, CODE_INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, COMMENT_PAD_IDX, CODE_PAD_IDX)
model.init_weights()

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

In [None]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

In [None]:
COMMENT_UNK_IDX = COMMENT.vocab.stoi[COMMENT.unk_token]
CODE_UNK_IDX = CODE.vocab.stoi[CODE.unk_token]

model.embedding1.weight.data[COMMENT_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding1.weight.data[COMMENT_PAD_IDX] = torch.zeros(EMBEDDING_DIM)


model.embedding2.weight.data[CODE_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding2.weight.data[CODE_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [None]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [None]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [None]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [None]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.c, batch.m).squeeze(1)
        
        loss = criterion(predictions, batch.y)
        
        acc = binary_accuracy(predictions, batch.y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.c, batch.m).squeeze(1)
            
            loss = criterion(predictions, batch.y)
            
            acc = binary_accuracy(predictions, batch.y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

# NBOW2 + Pretrained Embeddings

In [114]:
import torch
from torchtext import data
from torchtext import datasets

CODE = data.Field(tokenize=lambda x: x, preprocessing=lambda x: [w.replace('▁', '') for w in x])
COMMENT = data.Field(tokenize=lambda x: x, preprocessing=lambda x: [w.replace('▁', '') for w in x])
LABEL = data.LabelField(dtype = torch.float)
fields = {'C': ('c', COMMENT), 'M': ('m', CODE), 'Y': ('y', LABEL)}

In [115]:
my_data = data.TabularDataset(
                            path = 'data/dataset.json',
                            format = 'json',
                            fields = fields
)

In [116]:
print(vars(my_data[0]))

{'c': ['get', 'the', 'p', 'im', 'interface'], 'm': ['public', 'interface', 'get', 'interface', 'return', 'this', 'onos', 'interface'], 'y': 'INCONS'}


In [101]:
import random
SEED = 1234
train_data, test_data = my_data.split(random_state = random.seed(SEED))
train_data, val_data = train_data.split(split_ratio=0.8, random_state = random.seed(SEED))

In [102]:
MAX_VOCAB_SIZE = 25_000

COMMENT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE,  
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

CODE.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE,  
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)

In [45]:
import torchtext.vocab

glove = torchtext.vocab.GloVe(name = '6B', dim = 100)

print(f'There are {len(glove.itos)} words in the vocabulary')

There are 400000 words in the vocabulary


In [47]:
count_intersect = 0
for w in COMMENT.vocab.itos:
  if w in glove.stoi:
    count_intersect += 1

print(count_intersect / len(COMMENT.vocab.itos))

0.8441480292849263


In [48]:
count_intersect = 0
for w in CODE.vocab.itos:
  if w in glove.stoi:
    count_intersect += 1

print(count_intersect / len(CODE.vocab.itos))

0.751122357358583


In [50]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(val_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 42160
Number of validation examples: 10540
Number of testing examples: 22586


In [51]:
print(f"Unique tokens in COMMENT vocabulary: {len(COMMENT.vocab)}")
print(f"Unique tokens in CODE vocabulary: {len(CODE.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in COMMENT vocabulary: 9971
Unique tokens in CODE vocabulary: 12251
Unique tokens in LABEL vocabulary: 2


In [52]:
print(CODE.vocab.freqs.most_common(20))

[('get', 65544), ('return', 42390), ('if', 38228), ('string', 37965), ('new', 35885), ('public', 35560), ('name', 26251), ('exception', 22761), ('set', 21113), ('request', 19418), ('list', 18079), ('int', 17882), ('final', 17441), ('type', 17128), ('this', 16661), ('value', 15983), ('void', 15689), ('id', 15409), ('to', 15092), ('class', 14103)]


In [53]:
print(COMMENT.vocab.freqs.most_common(20))

[('the', 43775), ('a', 17053), ('of', 12110), ('to', 11661), ('and', 7677), ('link', 7646), ('is', 7342), ('for', 7225), ('this', 6237), ('in', 6079), ('returns', 5384), ('if', 5090), ('given', 4635), ('that', 4181), ('an', 4150), ('with', 4002), ('from', 3479), ('code', 3436), ('be', 3418), ('value', 3320)]


In [54]:
print(COMMENT.vocab.itos[:10])

['<unk>', '<pad>', 'the', 'a', 'of', 'to', 'and', 'link', 'is', 'for']


In [55]:
print(LABEL.vocab.stoi)

defaultdict(<function _default_unk_index at 0x7f1b5040f730>, {'INCONS': 0, 'CONS': 1})


In [56]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size = BATCH_SIZE,
    sort_key=lambda x: len(x.c) + len(x.m), # the BucketIterator needs to be told what function it should use to group the data.
    sort_within_batch=False,
    device = device)

In [86]:
import torch.nn as nn
import torch.nn.functional as F

class NBOW2(nn.Module):
    def __init__(self, comment_vocab_size, code_vocab_size, embedding_dim, output_dim, 
                 comment_pad_idx, code_pad_idx, dropout):
        
        super().__init__()
        
        self.embedding1 = nn.Embedding(comment_vocab_size, embedding_dim, padding_idx=comment_pad_idx)
        self.embedding2 = nn.Embedding(code_vocab_size, embedding_dim, padding_idx=code_pad_idx)
        
        self.fc = nn.Linear(2 * embedding_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)

        self.a1 = torch.nn.Parameter(torch.zeros(embedding_dim, 1))
        self.a2 = torch.nn.Parameter(torch.zeros(embedding_dim, 1))
        
        self.sigmoid = nn.Sigmoid()
        
        
    def forward(self, comment, code):
        
        #comment = [comment len, batch size]
        #code = [code len, batch size]
        
        embedded1 = self.embedding1(comment)
        embedded2 = self.embedding2(code)

        #embedded = [sent len, batch size, emb dim]
        
        embedded1 = embedded1.permute(1, 0, 2)
        embedded2 = embedded2.permute(1, 0, 2)
  
        #embedded = [batch size, sent len, emb dim]
        
        # Get the scalar word importance weights for each word w
        # a: [emb dim, 1]
        # aw: [batch size, sent len, 1]
        
        aw1 = torch.matmul(embedded1, self.a1)
        aw1 = self.sigmoid(aw1)

        aw2 = torch.matmul(embedded2, self.a2)
        aw2 = self.sigmoid(aw2)
        
        # w_emb: [batch size, sent len, emb dim]
        weighted_embed1 = embedded1 * aw1
        weighted_embed2 = embedded2 * aw2

        pooled1 = F.avg_pool2d(weighted_embed1, (weighted_embed1.shape[1], 1)).squeeze(1) 
        pooled2 = F.avg_pool2d(weighted_embed2, (weighted_embed2.shape[1], 1)).squeeze(1)
        #pooled = [batch size, embedding_dim]

        code_comment = self.dropout(torch.cat((pooled1, pooled2), dim=1))
                
        return self.fc(code_comment)
      
    def init_weights(self):
        initrange = 0.5

        self.a1.data.uniform_(0.0, 1.0)
        self.a2.data.uniform_(0.0, 1.0)

        self.fc.weight.data.uniform_(-initrange, initrange)
        self.fc.bias.data.zero_()

In [87]:
CODE_INPUT_DIM = len(CODE.vocab)
COMMENT_INPUT_DIM = len(COMMENT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
COMMENT_PAD_IDX = COMMENT.vocab.stoi[COMMENT.pad_token]
CODE_PAD_IDX = CODE.vocab.stoi[CODE.pad_token]
DROPOUT = 0.5

model = NBOW2(COMMENT_INPUT_DIM, CODE_INPUT_DIM, EMBEDDING_DIM, OUTPUT_DIM, COMMENT_PAD_IDX, CODE_PAD_IDX, DROPOUT)
model.init_weights()

In [88]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,222,601 trainable parameters


In [89]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

a1
a2
embedding1.weight
embedding2.weight
fc.weight
fc.bias


In [90]:
pretrained_embeddings = COMMENT.vocab.vectors

model.embedding1.weight.data.copy_(pretrained_embeddings)

pretrained_embeddings = CODE.vocab.vectors

model.embedding2.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.9197,  1.2477,  0.8128,  ..., -0.3548,  1.9117, -0.3136],
        [-0.4103, -0.6661,  0.4244,  ...,  0.4425,  1.7003,  0.0539],
        [ 0.1443,  0.4395,  0.5832,  ...,  0.5013,  0.4954,  0.4992],
        ...,
        [ 0.7680,  0.7413, -0.2156,  ...,  0.8223,  0.0310, -1.6101],
        [-0.4131,  0.5306,  0.1404,  ..., -0.1758, -0.3542,  0.2784],
        [ 0.5877, -0.0255, -0.5765,  ..., -0.4431, -0.2355, -0.3533]])

In [91]:
COMMENT_UNK_IDX = COMMENT.vocab.stoi[COMMENT.unk_token]
CODE_UNK_IDX = CODE.vocab.stoi[CODE.unk_token]

model.embedding1.weight.data[COMMENT_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding1.weight.data[COMMENT_PAD_IDX] = torch.zeros(EMBEDDING_DIM)


model.embedding2.weight.data[CODE_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding2.weight.data[CODE_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [92]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [93]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [94]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [95]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.c, batch.m).squeeze(1)
        
        loss = criterion(predictions, batch.y)
        
        acc = binary_accuracy(predictions, batch.y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [96]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.c, batch.m).squeeze(1)
            
            loss = criterion(predictions, batch.y)
            
            acc = binary_accuracy(predictions, batch.y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [97]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [98]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 4s
	Train Loss: 0.696 | Train Acc: 50.05%
	 Val. Loss: 0.696 |  Val. Acc: 49.64%
Epoch: 02 | Epoch Time: 0m 4s
	Train Loss: 0.689 | Train Acc: 52.78%
	 Val. Loss: 0.696 |  Val. Acc: 48.76%
Epoch: 03 | Epoch Time: 0m 4s
	Train Loss: 0.684 | Train Acc: 55.21%
	 Val. Loss: 0.696 |  Val. Acc: 48.78%
Epoch: 04 | Epoch Time: 0m 4s
	Train Loss: 0.677 | Train Acc: 56.67%
	 Val. Loss: 0.700 |  Val. Acc: 48.67%
Epoch: 05 | Epoch Time: 0m 4s
	Train Loss: 0.670 | Train Acc: 58.45%
	 Val. Loss: 0.706 |  Val. Acc: 48.66%


In [70]:
model.load_state_dict(torch.load('tut3-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.696 | Test Acc: 49.39%


# LSTM

In [1]:
import torch
from torchtext import data
from torchtext import datasets

CODE = data.Field(tokenize=lambda x: x, preprocessing=lambda x: [w.replace('▁', '') for w in x], include_lengths = True)
COMMENT = data.Field(tokenize=lambda x: x, preprocessing=lambda x: [w.replace('▁', '') for w in x], include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)
fields = {'C': ('c', COMMENT), 'M': ('m', CODE), 'Y': ('y', LABEL)}

In [2]:
my_data = data.TabularDataset(
                            path = 'data/dataset.json',
                            format = 'json',
                            fields = fields
)

In [3]:
print(vars(my_data[0]))

{'c': ['get', 'the', 'p', 'im', 'interface'], 'm': ['public', 'interface', 'get', 'interface', 'return', 'this', 'onos', 'interface'], 'y': 'INCONS'}


In [4]:
import random
SEED = 1234
train_data, test_data = my_data.split(random_state = random.seed(SEED))
train_data, val_data = train_data.split(split_ratio=0.8, random_state = random.seed(SEED))

In [5]:
MAX_VOCAB_SIZE = 25_000

COMMENT.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE,  
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

CODE.build_vocab(train_data, 
                 max_size = MAX_VOCAB_SIZE,  
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)

LABEL.build_vocab(train_data)

In [6]:
import torchtext.vocab

glove = torchtext.vocab.GloVe(name = '6B', dim = 100)

print(f'There are {len(glove.itos)} words in the vocabulary')

There are 400000 words in the vocabulary


In [7]:
count_intersect = 0
for w in COMMENT.vocab.itos:
  if w in glove.stoi:
    count_intersect += 1

print(count_intersect / len(COMMENT.vocab.itos))

0.8441480292849263


In [8]:
count_intersect = 0
for w in CODE.vocab.itos:
  if w in glove.stoi:
    count_intersect += 1

print(count_intersect / len(CODE.vocab.itos))

0.751122357358583


In [9]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(val_data)}')
print(f'Number of testing examples: {len(test_data)}')

Number of training examples: 42160
Number of validation examples: 10540
Number of testing examples: 22586


In [10]:
print(f"Unique tokens in COMMENT vocabulary: {len(COMMENT.vocab)}")
print(f"Unique tokens in CODE vocabulary: {len(CODE.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in COMMENT vocabulary: 9971
Unique tokens in CODE vocabulary: 12251
Unique tokens in LABEL vocabulary: 2


In [11]:
print(CODE.vocab.freqs.most_common(20))

[('get', 65544), ('return', 42390), ('if', 38228), ('string', 37965), ('new', 35885), ('public', 35560), ('name', 26251), ('exception', 22761), ('set', 21113), ('request', 19418), ('list', 18079), ('int', 17882), ('final', 17441), ('type', 17128), ('this', 16661), ('value', 15983), ('void', 15689), ('id', 15409), ('to', 15092), ('class', 14103)]


In [12]:
print(COMMENT.vocab.freqs.most_common(20))

[('the', 43775), ('a', 17053), ('of', 12110), ('to', 11661), ('and', 7677), ('link', 7646), ('is', 7342), ('for', 7225), ('this', 6237), ('in', 6079), ('returns', 5384), ('if', 5090), ('given', 4635), ('that', 4181), ('an', 4150), ('with', 4002), ('from', 3479), ('code', 3436), ('be', 3418), ('value', 3320)]


In [13]:
print(COMMENT.vocab.itos[:10])

['<unk>', '<pad>', 'the', 'a', 'of', 'to', 'and', 'link', 'is', 'for']


In [14]:
print(LABEL.vocab.stoi)

defaultdict(<function _default_unk_index at 0x7fbfba628510>, {'INCONS': 0, 'CONS': 1})


In [15]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size = BATCH_SIZE,
    sort=False,
    device = device)

In [16]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, comment_vocab_size, code_vocab_size, 
                 embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, comment_pad_idx, code_pad_idx, dropout):
        
        super().__init__()
        
        self.embedding1 = nn.Embedding(comment_vocab_size, embedding_dim, padding_idx = comment_pad_idx)
        self.embedding2 = nn.Embedding(code_vocab_size, embedding_dim, padding_idx = code_pad_idx)
        
        self.rnn1 = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout)
        
        self.rnn2 = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=bidirectional, 
                           dropout=dropout)
        


        self.fc = nn.Linear(hidden_dim * 2 * 2, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, comment, comment_lengths, code, code_lengths):
        
        #text = [sent len, batch size]
        
        embedded1 = self.dropout(self.embedding1(comment))
        embedded2 = self.dropout(self.embedding2(code))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pack sequence
        packed_embedded1 = nn.utils.rnn.pack_padded_sequence(embedded1, comment_lengths.cpu(), enforce_sorted=False)
        packed_output1, (hidden1, cell1) = self.rnn1(packed_embedded1)

        packed_embedded2 = nn.utils.rnn.pack_padded_sequence(embedded2, code_lengths.cpu(), enforce_sorted=False)
        packed_output2, (hidden2, cell2) = self.rnn2(packed_embedded2)

        
        #unpack sequence
        output1, output_lengths1 = nn.utils.rnn.pad_packed_sequence(packed_output1)
        output2, output_lengths2 = nn.utils.rnn.pad_packed_sequence(packed_output1)

        #output = [sent len, batch size, hid dim * num directions]
        #output over padding tokens are zero tensors
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden1 = self.dropout(torch.cat((hidden1[-2,:,:], hidden1[-1,:,:]), dim = 1))
        hidden2 = self.dropout(torch.cat((hidden2[-2,:,:], hidden2[-1,:,:]), dim = 1))

        hidden = self.dropout(torch.cat((hidden1, hidden2), dim=1))
                
        #hidden = [batch size, hid dim * num directions]
            
        return self.fc(hidden)

In [26]:
CODE_INPUT_DIM = len(CODE.vocab)
COMMENT_INPUT_DIM = len(COMMENT.vocab)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1
COMMENT_PAD_IDX = COMMENT.vocab.stoi[COMMENT.pad_token]
CODE_PAD_IDX = CODE.vocab.stoi[CODE.pad_token]
DROPOUT = 0.5
BIDIRECTIONAL = True
N_LAYERS = 2
HIDDEN_DIM = 256

model = RNN(COMMENT_INPUT_DIM, CODE_INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, COMMENT_PAD_IDX, CODE_PAD_IDX, 
            DROPOUT)

In [27]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 6,843,513 trainable parameters


In [28]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)

embedding1.weight
embedding2.weight
rnn1.weight_ih_l0
rnn1.weight_hh_l0
rnn1.bias_ih_l0
rnn1.bias_hh_l0
rnn1.weight_ih_l0_reverse
rnn1.weight_hh_l0_reverse
rnn1.bias_ih_l0_reverse
rnn1.bias_hh_l0_reverse
rnn1.weight_ih_l1
rnn1.weight_hh_l1
rnn1.bias_ih_l1
rnn1.bias_hh_l1
rnn1.weight_ih_l1_reverse
rnn1.weight_hh_l1_reverse
rnn1.bias_ih_l1_reverse
rnn1.bias_hh_l1_reverse
rnn2.weight_ih_l0
rnn2.weight_hh_l0
rnn2.bias_ih_l0
rnn2.bias_hh_l0
rnn2.weight_ih_l0_reverse
rnn2.weight_hh_l0_reverse
rnn2.bias_ih_l0_reverse
rnn2.bias_hh_l0_reverse
rnn2.weight_ih_l1
rnn2.weight_hh_l1
rnn2.bias_ih_l1
rnn2.bias_hh_l1
rnn2.weight_ih_l1_reverse
rnn2.weight_hh_l1_reverse
rnn2.bias_ih_l1_reverse
rnn2.bias_hh_l1_reverse
fc.weight
fc.bias


In [29]:
pretrained_embeddings = COMMENT.vocab.vectors

model.embedding1.weight.data.copy_(pretrained_embeddings)

pretrained_embeddings = CODE.vocab.vectors

model.embedding2.weight.data.copy_(pretrained_embeddings)

tensor([[-1.3390, -0.5360,  0.1918,  ..., -0.4738, -0.7479, -1.6854],
        [-0.1583,  0.1371, -0.7183,  ..., -0.7116, -0.7041, -0.4214],
        [ 0.1443,  0.4395,  0.5832,  ...,  0.5013,  0.4954,  0.4992],
        ...,
        [ 0.1713, -1.1731,  2.0175,  ...,  0.1772, -1.1146, -0.0331],
        [-0.4131,  0.5306,  0.1404,  ..., -0.1758, -0.3542,  0.2784],
        [ 0.5877, -0.0255, -0.5765,  ..., -0.4431, -0.2355, -0.3533]])

In [30]:
COMMENT_UNK_IDX = COMMENT.vocab.stoi[COMMENT.unk_token]
CODE_UNK_IDX = CODE.vocab.stoi[CODE.unk_token]

model.embedding1.weight.data[COMMENT_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding1.weight.data[COMMENT_PAD_IDX] = torch.zeros(EMBEDDING_DIM)


model.embedding2.weight.data[CODE_UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding2.weight.data[CODE_PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [31]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [32]:
criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

In [33]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [34]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        comment, comment_lens = batch.c
        code, code_lens = batch.m
        predictions = model(comment, comment_lens, code, code_lens).squeeze(1)
            
        loss = criterion(predictions, batch.y)
        
        acc = binary_accuracy(predictions, batch.y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [35]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            comment, comment_lens = batch.c
            code, code_lens = batch.m
            predictions = model(comment, comment_lens, code, code_lens).squeeze(1)
            
            loss = criterion(predictions, batch.y)
            
            acc = binary_accuracy(predictions, batch.y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [36]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [37]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 53s
	Train Loss: 0.693 | Train Acc: 50.52%
	 Val. Loss: 0.683 |  Val. Acc: 52.92%
Epoch: 02 | Epoch Time: 0m 53s
	Train Loss: 0.682 | Train Acc: 52.80%
	 Val. Loss: 0.680 |  Val. Acc: 52.12%
Epoch: 03 | Epoch Time: 0m 53s
	Train Loss: 0.673 | Train Acc: 55.10%
	 Val. Loss: 0.676 |  Val. Acc: 52.85%
Epoch: 04 | Epoch Time: 0m 53s
	Train Loss: 0.664 | Train Acc: 57.71%
	 Val. Loss: 0.674 |  Val. Acc: 53.36%
Epoch: 05 | Epoch Time: 0m 53s
	Train Loss: 0.649 | Train Acc: 59.87%
	 Val. Loss: 0.679 |  Val. Acc: 53.35%


In [38]:
model.load_state_dict(torch.load('tut3-model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

Test Loss: 0.674 | Test Acc: 52.98%


# Transformers

In [39]:
import torch

import random
import numpy as np

SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [40]:
!pip install transformers



In [41]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [42]:
tokens = tokenizer.tokenize('Hello WORLD how ARE yoU?')

print(tokens)

['hello', 'world', 'how', 'are', 'you', '?']


In [43]:
indexes = tokenizer.convert_tokens_to_ids(tokens)

print(indexes)

[7592, 2088, 2129, 2024, 2017, 1029]


In [44]:
init_token = tokenizer.cls_token
eos_token = tokenizer.sep_token
pad_token = tokenizer.pad_token
unk_token = tokenizer.unk_token

print(init_token, eos_token, pad_token, unk_token)

[CLS] [SEP] [PAD] [UNK]


In [45]:
init_token_idx = tokenizer.convert_tokens_to_ids(init_token)
eos_token_idx = tokenizer.convert_tokens_to_ids(eos_token)
pad_token_idx = tokenizer.convert_tokens_to_ids(pad_token)
unk_token_idx = tokenizer.convert_tokens_to_ids(unk_token)

print(init_token_idx, eos_token_idx, pad_token_idx, unk_token_idx)

101 102 0 100


In [46]:
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']

print(max_input_length)

512


In [47]:
def tokenize_and_cut(sentence):
    sentence = [w.replace('▁', '') for w in sentence]
    sentence = ' '.join(sentence)
    tokens = tokenizer.tokenize(sentence) 
    tokens = tokens[:max_input_length-2]
    return tokenizer.convert_tokens_to_ids(tokens)

In [60]:
tokenize_and_cut(cup[0]['M2'])

[2270, 8278, 2131, 8278, 2709, 2023, 21058, 2015, 8278]

In [61]:
import torch
from torchtext import data
from torchtext import datasets

CODE = data.Field(batch_first = True,
                  use_vocab = False,
                  tokenize = lambda x: x,
                  preprocessing = tokenize_and_cut,
                  init_token = init_token_idx,
                  eos_token = eos_token_idx,
                  pad_token = pad_token_idx,
                  unk_token = unk_token_idx)

COMMENT = data.Field(batch_first = True,
                  use_vocab = False,
                  tokenize = lambda x: x,
                  preprocessing = tokenize_and_cut,
                  init_token = init_token_idx,
                  eos_token = eos_token_idx,
                  pad_token = pad_token_idx,
                  unk_token = unk_token_idx)
LABEL = data.LabelField(dtype = torch.float)
fields = {'C': ('c', COMMENT), 'M': ('m', CODE), 'Y': ('y', LABEL)}

In [62]:
my_data = data.TabularDataset(
                            path = 'data/dataset.json',
                            format = 'json',
                            fields = fields
)

In [63]:
print(vars(my_data[0])) 

{'c': [2131, 1996, 1052, 10047, 8278], 'm': [2270, 8278, 2131, 8278, 2709, 2023, 21058, 2015, 8278], 'y': 'INCONS'}


In [64]:
print(tokenizer.convert_ids_to_tokens(vars(my_data[2])['c']))

['gets', 'ct', '##l', 'sc', '##hema', '##s', 'with', 'the', 'given', 'fully', 'qualified', 'name']


In [65]:
import random
SEED = 1234
train_data, test_data = my_data.split(random_state = random.seed(SEED))
train_data, val_data = train_data.split(split_ratio=0.8, random_state = random.seed(SEED))

In [66]:
print(f"Number of training examples: {len(train_data)}")
print(f"Number of validation examples: {len(val_data)}")
print(f"Number of testing examples: {len(test_data)}")

Number of training examples: 42160
Number of validation examples: 10540
Number of testing examples: 22586


In [67]:
LABEL.build_vocab(train_data)

In [68]:
print(LABEL.vocab.stoi)

defaultdict(<function _default_unk_index at 0x7fbfba628510>, {'INCONS': 0, 'CONS': 1})


In [93]:
BATCH_SIZE = 128

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size = BATCH_SIZE, 
    device = device, 
    sort=False)

In [70]:
from transformers import BertTokenizer, BertModel

bert = BertModel.from_pretrained('bert-base-uncased')

In [71]:
import torch.nn as nn

class BERTGRU(nn.Module):
    def __init__(self,
                 bert,
                 hidden_dim,
                 output_dim,
                 n_layers,
                 bidirectional,
                 dropout):
        
        super().__init__()
        
        self.bert = bert
        
        embedding_dim = bert.config.to_dict()['hidden_size']
        
        self.rnn1 = nn.GRU(embedding_dim,
                          hidden_dim,
                          num_layers = n_layers,
                          bidirectional = bidirectional,
                          batch_first = True,
                          dropout = 0 if n_layers < 2 else dropout)
        
        self.rnn2 = nn.GRU(embedding_dim,
                          hidden_dim,
                          num_layers = n_layers,
                          bidirectional = bidirectional,
                          batch_first = True,
                          dropout = 0 if n_layers < 2 else dropout)
                
        self.out = nn.Linear(hidden_dim * 2 * 2 if bidirectional else hidden_dim * 2, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, comment, code):
        
        #text = [batch size, sent len]
                
        with torch.no_grad():
            embedded1 = self.bert(comment)[0]
            embedded2 = self.bert(code)[0]
                
        #embedded = [batch size, sent len, emb dim]
        
        _, hidden1 = self.rnn1(embedded1)
        _, hidden2 = self.rnn2(embedded2)
        
        #hidden = [n layers * n directions, batch size, emb dim]
        
        if self.rnn1.bidirectional:

            hidden1 = self.dropout(torch.cat((hidden1[-2,:,:], hidden1[-1,:,:]), dim = 1))
            hidden2 = self.dropout(torch.cat((hidden2[-2,:,:], hidden2[-1,:,:]), dim = 1))
        else:
            hidden1 = self.dropout(hidden1[-1,:,:])
            hidden2 = self.dropout(hidden2[-1,:,:])
                
        #hidden = [batch size, hid dim]
        
        output = self.out(torch.cat((hidden1, hidden2), dim=1))
        
        #output = [batch size, out dim]
        
        return output

In [94]:
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25

model = BERTGRU(bert,
                HIDDEN_DIM,
                OUTPUT_DIM,
                N_LAYERS,
                BIDIRECTIONAL,
                DROPOUT)

In [95]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 5,518,337 trainable parameters


In [96]:
for name, param in model.named_parameters():                
    if name.startswith('bert'):
        param.requires_grad = False

In [97]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 5,518,337 trainable parameters


In [98]:
for name, param in model.named_parameters():                
    if param.requires_grad:
        print(name)

rnn1.weight_ih_l0
rnn1.weight_hh_l0
rnn1.bias_ih_l0
rnn1.bias_hh_l0
rnn1.weight_ih_l0_reverse
rnn1.weight_hh_l0_reverse
rnn1.bias_ih_l0_reverse
rnn1.bias_hh_l0_reverse
rnn1.weight_ih_l1
rnn1.weight_hh_l1
rnn1.bias_ih_l1
rnn1.bias_hh_l1
rnn1.weight_ih_l1_reverse
rnn1.weight_hh_l1_reverse
rnn1.bias_ih_l1_reverse
rnn1.bias_hh_l1_reverse
rnn2.weight_ih_l0
rnn2.weight_hh_l0
rnn2.bias_ih_l0
rnn2.bias_hh_l0
rnn2.weight_ih_l0_reverse
rnn2.weight_hh_l0_reverse
rnn2.bias_ih_l0_reverse
rnn2.bias_hh_l0_reverse
rnn2.weight_ih_l1
rnn2.weight_hh_l1
rnn2.bias_ih_l1
rnn2.bias_hh_l1
rnn2.weight_ih_l1_reverse
rnn2.weight_hh_l1_reverse
rnn2.bias_ih_l1_reverse
rnn2.bias_hh_l1_reverse
out.weight
out.bias


In [99]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [100]:
criterion = nn.BCEWithLogitsLoss()

In [101]:
model = model.to(device)
criterion = criterion.to(device)

In [102]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum() / len(correct)
    return acc

In [108]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        optimizer.zero_grad()
        
        predictions = model(batch.c, batch.m).squeeze(1)
        
        loss = criterion(predictions, batch.y)
        
        acc = binary_accuracy(predictions, batch.y)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [104]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            predictions = model(batch.c, batch.m).squeeze(1)
            
            loss = criterion(predictions, batch.y)
            
            acc = binary_accuracy(predictions, batch.y)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [85]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
N_EPOCHS = 5

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
        
    end_time = time.time()
        
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut6-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 17m 50s
	Train Loss: 0.700 | Train Acc: 50.81%
	 Val. Loss: 0.693 |  Val. Acc: 52.48%
