In [1]:
import os
import sys
import time
import pickle
import random
import torch
from model_embed import BinaryClassifier
import numpy
from e_embedding import EnhanceEmbedding

In [2]:
def get_batch(x, y, idx, bs):
	x_batch = x[idx: idx+bs]
	y_batch = y[idx: idx+bs]
	return x_batch, torch.LongTensor(y_batch)


def load_from_file(file_path):
	with open(file_path, "rb") as file:
		return pickle.load(file)


def print_parameter_statistics(model):
	total_num = [p.numel() for p in model.parameters()]
	trainable_num = [p.numel() for p in model.parameters() if p.requires_grad]
	print("Total parameters: {}".format(sum(total_num)))
	print("Trainable parameters: {}".format(sum(trainable_num)))

In [3]:
fix_pattern = 'InsertMissedStmt'
print("Fix pattern: {}".format(fix_pattern))
root = "../data/{}/".format(fix_pattern)

train_x = load_from_file(os.path.join(root, "train/x_w2v_new.pkl"))
# val_x = load_from_file(os.path.join(root, "val/x_w2v_new.pkl"))
# test_x = load_from_file(os.path.join(root, "test/x_w2v_new.pkl"))
	
train_y = load_from_file(os.path.join(root, "train/y_.pkl"))
# val_y = load_from_file(os.path.join(root, "val/y_.pkl"))
# test_y = load_from_file(os.path.join(root, "test/y_.pkl"))

pretrain_vectors = load_from_file(os.path.join(root, "vectors.pkl"))

Fix pattern: InsertMissedStmt


In [4]:
HIDDEN_DIM = 50
EPOCHS = 30
BATCH_SIZE = 64
LABELS = 2
USE_GPU = True
MAX_TOKENS = pretrain_vectors.shape[0]
EMBEDDING_DIM = pretrain_vectors.shape[1]

In [5]:

model = BinaryClassifier(EMBEDDING_DIM, HIDDEN_DIM, MAX_TOKENS, LABELS, pretrain_vectors).cuda()

In [6]:
parameters = model.parameters()
optimizer = torch.optim.Adam(parameters, lr=0.001)
print("Optimizer: {}".format(type(optimizer).__name__))
loss_function = torch.nn.CrossEntropyLoss()
print("Loss function: {}".format(type(loss_function).__name__))
print_parameter_statistics(model)

Optimizer: Adam
Loss function: CrossEntropyLoss
Total parameters: 853418
Trainable parameters: 853418


In [7]:
random.seed(666)
random.shuffle(train_x)
random.seed(666)
random.shuffle(train_y)

In [8]:
model.train()
total_acc = 0.0
total_loss = 0.0
total = 0
total_tp = 0.0
total_fp = 0.0
total_fn = 0.0

In [9]:
i = 0

In [10]:
batch = get_batch(train_x, train_y, i, BATCH_SIZE)
train_inputs, train_labels = batch
if USE_GPU:
    train_labels = train_labels.cuda()
i += BATCH_SIZE
model.zero_grad()
output = model(train_inputs)
loss = loss_function(output, train_labels)


torch.Size([64, 100, 400])
400
torch.Size([64, 100])


In [11]:
loss.backward()
optimizer.step()

_, predicted = torch.max(output.data, 1)
total_acc += (predicted == train_labels).sum().item()
total += len(train_inputs)
total_loss += loss.item() * len(train_inputs)
total_tp += (((predicted == 0).long() + (train_labels == 0).long()) == 2).sum().item()
total_fp += (((predicted == 0).long() + (train_labels == 1).long()) == 2).sum().item()
total_fn += (((predicted == 1).long() + (train_labels == 0).long()) == 2).sum().item()