<a href="https://colab.research.google.com/github/Thushan97/CURE/blob/master/cure.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/Thushan97/CURE.git

Cloning into 'CURE'...
remote: Enumerating objects: 990, done.[K
remote: Counting objects: 100% (318/318), done.[K
remote: Compressing objects: 100% (275/275), done.[K
remote: Total 990 (delta 41), reused 295 (delta 29), pack-reused 672[K
Receiving objects: 100% (990/990), 79.61 MiB | 12.86 MiB/s, done.
Resolving deltas: 100% (53/53), done.


In [None]:
!mv /content/CURE/* /content/

In [None]:
# pretrain model download
!wget "https://zenodo.org/record/7030145/files/models.tar.xz?download=1" -c -O 'models.tar.xz'
!mkdir /content/data/models
!tar -xf models.tar.xz
!mv /content/models/* /content/data/models/


--2022-09-12 12:40:41--  https://zenodo.org/record/7030145/files/models.tar.xz?download=1
Resolving zenodo.org (zenodo.org)... 188.184.117.155
Connecting to zenodo.org (zenodo.org)|188.184.117.155|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1911937724 (1.8G) [application/octet-stream]
Saving to: ‘models.tar.xz’


2022-09-12 12:42:46 (14.9 MB/s) - ‘models.tar.xz’ saved [1911937724/1911937724]



In [None]:
!pip install transformers==2.10.0 subword-nmt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==2.10.0
  Downloading transformers-2.10.0-py3-none-any.whl (660 kB)
[K     |████████████████████████████████| 660 kB 33.1 MB/s 
[?25hCollecting subword-nmt
  Downloading subword_nmt-0.3.8-py3-none-any.whl (27 kB)
Collecting sentencepiece
  Downloading sentencepiece-0.1.97-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 51.0 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[K     |████████████████████████████████| 880 kB 61.8 MB/s 
[?25hCollecting tokenizers==0.7.0
  Downloading tokenizers-0.7.0-cp37-cp37m-manylinux1_x86_64.whl (5.6 MB)
[K     |████████████████████████████████| 5.6 MB 31.4 MB/s 
Collecting mock
  Downloading mock-4.0.3-py3-none-any.whl (28 kB)
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?2

---

Run gpt_conut_trainer.py file

In [None]:
import os
import sys
import json
import time
import codecs
import random
import numpy as np
import torch
import torch.nn as nn
from transformers import OpenAIGPTLMHeadModel

GPT_CONUT_TRAINER_DIR = os.path.abspath('/content')#os.path.abspath(__file__)[: os.path.abspath(__file__).rindex('/') + 1]

In [None]:
from src.models.gpt_conut import GPTCoNuTModel
from src.dataloader.dictionary import Dictionary
from src.dataloader.gpt_conut_data_loader import GPTCoNuTDataLoader

# New Section

In [None]:
# print(f'CUDA GPU availible : {torch.cuda.is_available()}')
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class GPTCoNuTTrainer():
    def __init__(self, train_loader, valid_loader, dictionary, gpt_file):
        gpt_loaded = torch.load(gpt_file)
        config = gpt_loaded['config']
        gpt_model = OpenAIGPTLMHeadModel(config).cuda()
        gpt_model.load_state_dict(gpt_loaded['model'])

        self.train_loader = train_loader
        self.valid_loader = valid_loader
        self.dictionary = dictionary

        self.batch_size = 12
        self.load_size = 1200   # load 1200 samples from training data every time

        self.gpt_model = gpt_model
        self.model = None
        self.hyper_parameter = {}
        self.optimizer = None
        self.current_train_step = 0
        self.val_loss = {}

    def shuffle_dataset(self):
        indices = [i for i in range(len(self.train_loader.dataset))]
        random.shuffle(indices)
        return indices

    def train_step(self, samples):
        self.model.train()
        self.current_train_step += 1
        self.optimizer.zero_grad()

        batch = self.train_loader.dataset.collater(samples)
        if torch.cuda.is_available():
            outputs = self.model(
                batch['net_input']['src_tokens'].cuda(),
                batch['net_input']['src_with_prev_context'].cuda(),
                batch['net_input']['ctx_tokens'].cuda(),
                prev_tokens_index=batch['target_index'].cuda(),
                prev_tokens_with_context=batch['target_with_prev_context'].cuda(),
                labels=batch['target'].cuda(),
            )
        else:
            outputs = self.model(
                batch['net_input']['src_tokens'],
                batch['net_input']['src_with_prev_context'],
                batch['net_input']['ctx_tokens'],
                prev_tokens_index=batch['target_index'],
                prev_tokens_with_context=batch['target_with_prev_context'],
                labels=batch['target'],
            )
        logits, avg_attn_scores, apr_loss, lm_loss = outputs[:4]
        loss = apr_loss + 0.3 * lm_loss
        loss.mean().backward()
        nn.utils.clip_grad_norm_(self.model.parameters(), 0.5, norm_type=2)
        self.optimizer.step()
        return loss.mean().item(), apr_loss.mean().item(), lm_loss.mean().item()

    def valid_step(self, samples):
        self.model.eval()
        batch = self.valid_loader.dataset.collater(samples)
        outputs = self.model(
            batch['net_input']['src_tokens'].cuda(),
            batch['net_input']['src_with_prev_context'].cuda(),
            batch['net_input']['ctx_tokens'].cuda(),
            prev_tokens_index=batch['target_index'].cuda(),
            prev_tokens_with_context=batch['target_with_prev_context'].cuda(),
            labels=batch['target'].cuda(),
        )
        logits, avg_attn_scores, apr_loss, lm_loss = outputs[:4]
        loss = apr_loss + 0.3 * lm_loss
        return loss.mean().item(), apr_loss.mean().item(), lm_loss.mean().item(), logits

    def validate_and_save(self, model_id, save_dir):
        oom = 0
        with torch.no_grad():
            val_loss, val_fconv_loss, val_lm_loss = [], [], []
            for i in range(0, self.valid_loader.total_size, self.batch_size):
                samples = [self.valid_loader.dataset[j]
                           for j in range(i, min(len(self.valid_loader.dataset), i + self.batch_size))]
                try:
                    loss, fconv_loss, lm_loss, logits = self.valid_step(samples)
                    val_loss.append(float(loss))
                    val_fconv_loss.append(float(fconv_loss))
                    val_lm_loss.append(float(lm_loss))
                except Exception as e:
                    oom += 1

            info = 'val loss:{}, val apr_loss:{}, val lm_loss:{}, val ppl:{}, oom:{}'.format(
                round(float(np.mean(val_loss)), 6),
                round(float(np.mean(val_fconv_loss)), 6),
                round(float(np.mean(val_lm_loss)), 6),
                round(float(np.exp(np.mean(val_loss))), 6),
                oom
            )
            print(info)

            val_loss = np.mean(val_fconv_loss)
            checkpoint = {
                'model': self.model.state_dict(),
                'optimizer': self.optimizer.state_dict(),
                'current_step': self.current_train_step,
                # 'config': self.model.module.config(),
                'val_loss': val_loss,
            }
            torch.save(checkpoint, save_dir + 'gpt_conut_' + str(model_id) + '.pt')
            self.val_loss[model_id] = {
                'val_loss': val_loss,
                'hyper-parameter': str(self.hyper_parameter),
            }

        return val_loss

    def train(self, model_id, epochs, hyper_parameter, save_dir):
        self.hyper_parameter = hyper_parameter
        self.model = GPTCoNuTModel(
            self.dictionary, embed_dim=384, max_positions=1024,
            src_encoder_convolutions=self.hyper_parameter['src_encoder_convolutions'],
            ctx_encoder_convolutions=self.hyper_parameter['ctx_encoder_convolutions'],
            decoder_convolutions=self.hyper_parameter['decoder_convolutions'],
            dropout=self.hyper_parameter['dropout'], embed_model=self.gpt_model,
        ).cuda()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=6.25e-5)
        # self.model = nn.DataParallel(self.model, device_ids=device_ids)
        
        self.valid_loader.load_data(0, self.valid_loader.total_size)
        for epoch in range(epochs):
            start_time = time.time()
            for i in range(0, self.train_loader.total_size, self.load_size):
                oom = 0
                self.train_loader.load_data(i, i + self.load_size)
                indices = self.shuffle_dataset()
                train_loss, train_apr_loss, train_lm_loss = [], [], []

                start, end = 0, 0
                samples = []
                max_src, max_ctx, max_tgt = 0, 0, 0
                while end < len(self.train_loader.dataset):
                    sample = self.train_loader.dataset[indices[end]]
                    if max_ctx + len(sample['target']) >= 1023 \
                            or max_tgt + len(sample['prev_context']) >= 1023 \
                            or max_ctx + len(sample['source']) >= 1023 \
                            or max_src + len(sample['prev_context']) >= 1023 \
                            or end - start == self.batch_size:
                        try:
                            loss, apr_loss, lm_loss = self.train_step(samples)
                            train_loss.append(loss)
                            train_apr_loss.append(apr_loss)
                            train_lm_loss.append(lm_loss)
                        except Exception as e:
                            oom += 1

                        start = end
                        max_src, max_ctx, max_tgt = 0, 0, 0
                        samples = []
                        continue
                    max_src = max(max_src, len(sample['source']))
                    max_ctx = max(max_ctx, len(sample['prev_context']))
                    max_tgt = max(max_tgt, len(sample['target']))
                    end += 1
                    samples.append(sample)
                if len(samples) > 0:
                    try:
                        loss, apr_loss, lm_loss = self.train_step(samples)
                        train_loss.append(loss)
                        train_apr_loss.append(apr_loss)
                        train_lm_loss.append(lm_loss)
                    except Exception as e:
                        oom += 1

                if (i // self.load_size) % 10 == 0:
                    info = 'epoch:{}, load data:{}, lr:{}, loss:{}, apr_loss:{}, lm_loss:{}, time:{}s, oom:{}'.\
                        format(epoch + 1, i + self.load_size,
                               round(self.optimizer.param_groups[0]['lr'], 10),
                               round(float(np.mean(train_loss)), 6),
                               round(float(np.mean(train_apr_loss)), 6),
                               round(float(np.mean(train_lm_loss)), 6),
                               int(time.time() - start_time), oom
                               )
                    start_time = time.time()
                    print(str(model_id) + ' ' + info)

                if (i // self.load_size) % 100 == 0:
                    self.validate_and_save(model_id, save_dir)
        self.validate_and_save(model_id, save_dir)

In [None]:
if __name__ == '__main__':
    device_ids = [0, 1, 2, 3]
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    
    vocab_file = GPT_CONUT_TRAINER_DIR + '/data/vocabulary/vocabulary.txt'
    train_file = GPT_CONUT_TRAINER_DIR + '/data/data/training_bpe.txt'
    valid_file = GPT_CONUT_TRAINER_DIR + '/data/data/validation_bpe.txt'
    gpt_file = GPT_CONUT_TRAINER_DIR + '/data/models/code_gpt.pt'

    dictionary = Dictionary(vocab_file, min_cnt=0)
    print('dictionary initialized, vocab size:{}'.format(len(dictionary)))

    train_loader = GPTCoNuTDataLoader(train_file, dictionary)
    valid_loader = GPTCoNuTDataLoader(valid_file, dictionary)
    print('data loader initialized, train size:{}, validate size:{}'.
          format(train_loader.total_size, valid_loader.total_size))

    trainer = GPTCoNuTTrainer(train_loader, valid_loader, dictionary, gpt_file)

    hyper_parameter = {
        'src_encoder_convolutions': ((192, 5),) * 1,
        'ctx_encoder_convolutions': ((384, 5),) * 1,
        'decoder_convolutions': ((192, 5),) * 1,
        'dropout': 0.1,
    }
    model_id = 1
    epochs = 5
    trainer.train(model_id, epochs, hyper_parameter, save_dir=GPT_CONUT_TRAINER_DIR + '/data/models/')

dictionary initialized, vocab size:50061
data loader initialized, train size:2000, validate size:100




1 epoch:1, load data:1200, lr:6.25e-05, loss:9.26268, apr_loss:6.878682, lm_loss:7.946659, time:63s, oom:36
val loss:5.058121, val apr_loss:3.304267, val lm_loss:5.846182, val ppl:157.294747, oom:0
1 epoch:2, load data:1200, lr:6.25e-05, loss:3.467829, apr_loss:3.062547, lm_loss:1.350938, time:66s, oom:34
val loss:2.32264, val apr_loss:2.110734, val lm_loss:0.706351, val ppl:10.202569, oom:0
1 epoch:3, load data:1200, lr:6.25e-05, loss:2.268841, apr_loss:2.029103, lm_loss:0.799127, time:66s, oom:36
val loss:1.682003, val apr_loss:1.480034, val lm_loss:0.67323, val ppl:5.376316, oom:0
1 epoch:4, load data:1200, lr:6.25e-05, loss:1.770133, apr_loss:1.547027, lm_loss:0.743687, time:70s, oom:30
val loss:1.404391, val apr_loss:1.20386, val lm_loss:0.668437, val ppl:4.073047, oom:0
1 epoch:5, load data:1200, lr:6.25e-05, loss:1.574451, apr_loss:1.348321, lm_loss:0.753765, time:68s, oom:33
val loss:1.270049, val apr_loss:1.073574, val lm_loss:0.654915, val ppl:3.561027, oom:0
val loss:1.23336

Run gpt_fconv_trainer.py file

In [None]:
import json
import os
import sys
import time
import codecs
import random
import numpy as np
import torch
import torch.nn as nn
from transformers import OpenAIGPTLMHeadModel

# GPT_FCONV_TRAINER_DIR = os.path.abspath(__file__)[: os.path.abspath(__file__).rindex('/') + 1]
GPT_FCONV_TRAINER_DIR = os.path.abspath('/content')

from src.models.gpt_fconv import GPTFConvModel
from src.dataloader.dictionary import Dictionary
from src.dataloader.gpt_fconv_data_loader import GPTFConvDataLoader


class GPTFConvTrainer():
    def __init__(self, train_loader, valid_loader, dictionary, gpt_file):
        gpt_loaded = torch.load(gpt_file)
        config = gpt_loaded['config']
        gpt_model = OpenAIGPTLMHeadModel(config).cuda()
        gpt_model.load_state_dict(gpt_loaded['model'])

        self.train_loader = train_loader
        self.valid_loader = valid_loader
        self.dictionary = dictionary

        self.batch_size = 12
        self.load_size = 1200

        self.gpt_model = gpt_model
        self.model = None
        self.hyper_parameter = {}
        self.hyper_parameter_set = {'{}'}
        self.optimizer = None
        self.current_train_step = 0
        self.val_loss = {}

    def shuffle_dataset(self):
        indices = [i for i in range(len(self.train_loader.dataset))]
        random.shuffle(indices)
        return indices

    def train_step(self, samples):
        self.model.train()
        self.current_train_step += 1
        self.optimizer.zero_grad()

        batch = self.train_loader.dataset.collater(samples)
        if torch.cuda.is_available():
            outputs = self.model(
                batch['net_input']['src_tokens'].cuda(),
                batch['net_input']['src_with_prev_context'].cuda(),
                prev_tokens_index=batch['target_index'].cuda(),
                prev_tokens_with_context=batch['target_with_prev_context'].cuda(),
                labels=batch['target'].cuda(),
            )

        logits, avg_attn_scores, apr_loss, lm_loss = outputs[:4]
        loss = apr_loss + 0.3 * lm_loss
        loss.mean().backward()
        nn.utils.clip_grad_norm_(self.model.parameters(), 0.5, norm_type=2)
        self.optimizer.step()
        return loss.mean().item(), apr_loss.mean().item(), lm_loss.mean().item()

    def valid_step(self, samples):
        self.model.eval()
        batch = self.valid_loader.dataset.collater(samples)
        outputs = self.model(
            batch['net_input']['src_tokens'].cuda(),
            batch['net_input']['src_with_prev_context'].cuda(),
            prev_tokens_index=batch['target_index'].cuda(),
            prev_tokens_with_context=batch['target_with_prev_context'].cuda(),
            labels=batch['target'].cuda(),
        )
        logits, avg_attn_scores, apr_loss, lm_loss = outputs[:4]
        loss = apr_loss + 0.3 * lm_loss
        return loss.mean().item(), apr_loss.mean().item(), lm_loss.mean().item(), logits

    def validate_and_save(self, model_id, save_dir):
        oom = 0
        with torch.no_grad():
            val_loss, val_fconv_loss, val_lm_loss = [], [], []
            for i in range(0, self.valid_loader.total_size, self.batch_size):
                samples = [self.valid_loader.dataset[j]
                           for j in range(i, min(len(self.valid_loader.dataset), i + self.batch_size))]
                try:
                    loss, fconv_loss, lm_loss, logits = self.valid_step(samples)
                    val_loss.append(float(loss))
                    val_fconv_loss.append(float(fconv_loss))
                    val_lm_loss.append(float(lm_loss))
                except Exception as e:
                    oom += 1

            info = 'val loss:{}, val apr_loss:{}, val lm_loss:{}, val ppl:{}, oom:{}'.format(
                round(float(np.mean(val_loss)), 6),
                round(float(np.mean(val_fconv_loss)), 6),
                round(float(np.mean(val_lm_loss)), 6),
                round(float(np.exp(np.mean(val_loss))), 6),
                oom
            )
            print(info)

            val_loss = np.mean(val_fconv_loss)
            checkpoint = {
                'model': self.model.state_dict(),
                'optimizer': self.optimizer.state_dict(),
                'current_step': self.current_train_step,
                'config': self.model.config(),
                'val_loss': val_loss,
            }
            torch.save(checkpoint, save_dir + 'gpt_fconv_' + str(model_id) + '.pt')
            self.val_loss[model_id] = {
                'val_loss': val_loss,
                'hyper-parameter': str(self.hyper_parameter),
            }
        return val_loss

    def train(self, model_id, epochs, hyper_parameter, save_dir):
        self.hyper_parameter = hyper_parameter
        self.model = GPTFConvModel(
                self.dictionary, embed_dim=384, max_positions=1024,
                encoder_convolutions=self.hyper_parameter['encoder_convolutions'],
                decoder_convolutions=self.hyper_parameter['decoder_convolutions'],
                dropout=self.hyper_parameter['dropout'], embed_model=self.gpt_model,
            ).cuda()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=6.25e-5)
        # self.model = nn.DataParallel(self.model, device_ids=device_ids)
        
        self.valid_loader.load_data(0, self.valid_loader.total_size)
        for epoch in range(epochs):
            start_time = time.time()
            for i in range(0, self.train_loader.total_size, self.load_size):
                oom = 0
                self.train_loader.load_data(i, i + self.load_size)
                indices = self.shuffle_dataset()
                train_loss, train_apr_loss, train_lm_loss = [], [], []

                start, end = 0, 0
                samples = []
                max_src, max_ctx, max_tgt = 0, 0, 0
                while end < len(self.train_loader.dataset):
                    sample = self.train_loader.dataset[indices[end]]
                    if max_ctx + len(sample['target']) >= 1023 \
                            or max_tgt + len(sample['prev_context']) >= 1023 \
                            or max_ctx + len(sample['source']) >= 1023 \
                            or max_src + len(sample['prev_context']) >= 1023 \
                            or end - start == self.batch_size:
                        try:
                            loss, apr_loss, lm_loss = self.train_step(samples)
                            train_loss.append(loss)
                            train_apr_loss.append(apr_loss)
                            train_lm_loss.append(lm_loss)
                        except Exception as e:
                            oom += 1

                        start = end
                        max_src, max_ctx, max_tgt = 0, 0, 0
                        samples = []
                        continue
                    max_src = max(max_src, len(sample['source']))
                    max_ctx = max(max_ctx, len(sample['prev_context']))
                    max_tgt = max(max_tgt, len(sample['target']))
                    end += 1
                    samples.append(sample)
                if len(samples) > 0:
                    try:
                        loss, apr_loss, lm_loss = self.train_step(samples)
                        train_loss.append(loss)
                        train_apr_loss.append(apr_loss)
                        train_lm_loss.append(lm_loss)
                    except Exception as e:
                        oom += 1

                if (i // self.load_size) % 10 == 0:
                    info = 'epoch:{}, load data:{}, lr:{}, loss:{}, apr_loss:{}, lm_loss:{}, time:{}s, oom:{}'.\
                        format(epoch + 1, i + self.load_size,
                               round(self.optimizer.param_groups[0]['lr'], 10),
                               round(float(np.mean(train_loss)), 6),
                               round(float(np.mean(train_apr_loss)), 6),
                               round(float(np.mean(train_lm_loss)), 6),
                               int(time.time() - start_time), oom
                               )
                    start_time = time.time()
                    print(str(model_id) + ' ' + info)

                if (i // self.load_size) % 100 == 0:
                    self.validate_and_save(model_id, save_dir)
        self.validate_and_save(model_id, save_dir)


if __name__ == '__main__':
    device_ids = [0, 1, 2, 3]
    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
    
    vocab_file = GPT_FCONV_TRAINER_DIR + '/data/vocabulary/vocabulary.txt'
    train_file = GPT_FCONV_TRAINER_DIR + '/data/data/training_bpe.txt'
    valid_file = GPT_FCONV_TRAINER_DIR + '/data/data/validation_bpe.txt'
    gpt_file = GPT_FCONV_TRAINER_DIR + '/data/models/code_gpt.pt'

    dictionary = Dictionary(vocab_file, min_cnt=0)
    print('dictionary initialized, vocab size:{}'.format(len(dictionary)))

    train_loader = GPTFConvDataLoader(train_file, dictionary)
    valid_loader = GPTFConvDataLoader(valid_file, dictionary)
    print('data loader initialized, train size:{}, validate size:{}'.
          format(train_loader.total_size, valid_loader.total_size))

    trainer = GPTFConvTrainer(train_loader, valid_loader, dictionary, gpt_file)

    hyper_parameter = {
        'encoder_convolutions': ((192, 5),) * 1,
        'decoder_convolutions': ((192, 5),) * 1,
        'dropout': 0.1,
    }
    trainer.train(1, 2, hyper_parameter, save_dir=GPT_FCONV_TRAINER_DIR + '/data/models/')


dictionary initialized, vocab size:50061
data loader initialized, train size:2000, validate size:100
1 epoch:1, load data:1200, lr:6.25e-05, loss:7.784297, apr_loss:5.795911, lm_loss:6.627955, time:56s, oom:12
val loss:3.369558, val apr_loss:2.734781, val lm_loss:2.115924, val ppl:29.065682, oom:0
1 epoch:2, load data:1200, lr:6.25e-05, loss:2.67298, apr_loss:2.404278, lm_loss:0.895674, time:55s, oom:12
val loss:1.878901, val apr_loss:1.64997, val lm_loss:0.763105, val ppl:6.546308, oom:0
val loss:1.601408, val apr_loss:1.37689, val lm_loss:0.748394, val ppl:4.960009, oom:0


Run generator.py file

In [None]:
import codecs
import torch
import sys
import os
from transformers import OpenAIGPTLMHeadModel

# GENERATOR_DIR = os.path.abspath(__file__)[: os.path.abspath(__file__).rindex('/') + 1]
GENERATOR_DIR = os.path.abspath('/content')
sys.path.append(GENERATOR_DIR + '/models/')
sys.path.append(GENERATOR_DIR + '/dataloader/')
sys.path.append(GENERATOR_DIR + '/tester/')
from src.dataloader.gpt_conut_data_loader import GPTCoNuTDataLoader
from src.dataloader.gpt_fconv_data_loader import GPTFConvDataLoader
from src.dataloader.identifier_data_loader import IdentifierDataLoader
from src.dataloader.dictionary import Dictionary
from src.models.gpt_conut import GPTCoNuTModel
from src.models.gpt_fconv import GPTFConvModel
from src.tester.beamsearch import BeamSearch


class Generator():
    def __init__(self, model, dictionary, data_loader, beam_size=10):
        self.model = model
        self.dictionary = dictionary
        self.data_loader = data_loader
        self.beam_size = beam_size
        self.beamsearch = BeamSearch(model, dictionary, beam_size)
        print(self.model, beam_size)

    def generate(self, output_path):
        wp = codecs.open(output_path, 'w', 'utf-8')
        self.data_loader.load_data(0, self.data_loader.total_size)
        for i in range(self.data_loader.total_size):
            print(i, '/', self.data_loader.total_size)
            data = self.data_loader.dataset[i]
            if True:
                self.beamsearch.beam_size = self.beam_size
                sample = self.data_loader.dataset.collater([data])
                with torch.no_grad():
                    if isinstance(self.model, GPTCoNuTModel):
                        hypothesis = self.beamsearch.generate_gpt_conut(sample)
                    elif isinstance(self.model, GPTFConvModel):
                        hypothesis = self.beamsearch.generate_gpt_fconv(sample)
            # except Exception as e:
            #    print(e)
            #    continue
            id = str(sample['id'].item())
            wp.write('S-{}\t'.format(id))
            wp.write(self.dictionary.string(data['source']) + '\n')
            wp.write('T-{}\t'.format(id))
            wp.write(self.dictionary.string(data['target']) + '\n')
            for h in hypothesis:
                wp.write('H-{}\t{}\t'.format(id, str(h['final_score'])))
                wp.write(self.dictionary.string(h['hypo']) + '\n')
                wp.write('P-{}\t'.format(id))
                wp.write(' '.join(str(round(s.item(), 4)) for s in h['score']) + '\n')
        wp.close()


def generate_gpt_conut(vocab_file, model_file, input_file, identifier_txt_file, identifier_token_file, output_file, beam_size):
    dictionary = Dictionary(vocab_file, min_cnt=0)
    print(len(dictionary))
    loaded = torch.load(model_file, map_location='cpu')
    config = loaded['config']
    gpt_config = config['embed_model_config']
    gpt_config.attn_pdrop = 0
    gpt_config.embd_pdrop = 0
    gpt_config.resid_pdrop = 0
    gpt_model = OpenAIGPTLMHeadModel(gpt_config)
    model = GPTCoNuTModel(
        dictionary=dictionary, embed_dim=config['embed_dim'],
        max_positions=config['max_positions'],
        src_encoder_convolutions=config['src_encoder_convolutions'],
        ctx_encoder_convolutions=config['ctx_encoder_convolutions'],
        decoder_convolutions=config['decoder_convolutions'],
        dropout=0, embed_model=gpt_model,
    )

    model.load_state_dict(loaded['model'])
    identifier_loader = IdentifierDataLoader(
        dictionary, identifier_token_file, identifier_txt_file
    )
    data_loader = GPTCoNuTDataLoader(
        input_file, dictionary,
        identifier_loader=identifier_loader
    )
    generator = Generator(model, dictionary, data_loader, beam_size=beam_size)
    print('start generate')
    generator.generate(output_file)


def generate_gpt_fconv(vocab_file, model_file, input_file, identifier_txt_file, identifier_token_file, output_file, beam_size):
    dictionary = Dictionary(vocab_file, min_cnt=0)
    print(len(dictionary))
    loaded = torch.load(
        model_file, map_location='cpu'
    )
    config = loaded['config']
    gpt_config = config['embed_model_config']
    gpt_config.attn_pdrop = 0
    gpt_config.embd_pdrop = 0
    gpt_config.resid_pdrop = 0
    gpt_model = OpenAIGPTLMHeadModel(gpt_config)
    model = GPTFConvModel(
        dictionary=dictionary, embed_dim=config['embed_dim'],
        max_positions=config['max_positions'],
        encoder_convolutions=config['encoder_convolutions'],
        decoder_convolutions=config['decoder_convolutions'],
        dropout=0, embed_model=gpt_model,
    )
    model.load_state_dict(loaded['model'])
    identifier_loader = IdentifierDataLoader(
        dictionary, identifier_token_file, identifier_txt_file
    )
    data_loader = GPTFConvDataLoader(
        input_file, dictionary,
        identifier_loader=identifier_loader
    )
    generator = Generator(model, dictionary, data_loader, beam_size=beam_size)
    print('start generate')
    generator.generate(output_file)


if __name__ == "__main__":
    vocab_file = GENERATOR_DIR + '/data/vocabulary/vocabulary.txt'
    input_file = GENERATOR_DIR + '/candidate_patches/QuixBugs/quixbugs_bpe.txt'
    identifier_txt_file = GENERATOR_DIR + '/candidate_patches/QuixBugs/identifier.txt'
    identifier_token_file = GENERATOR_DIR + '/candidate_patches/QuixBugs/identifier.tokens'
    beam_size = 1000
    os.environ['CUDA_VISIBLE_DEVICES'] = "0"

    model_file = GENERATOR_DIR + '/data/models/gpt_conut_1.pt'
    output_file = GENERATOR_DIR + '/data/patches/gpt_conut_1.txt'
    generate_gpt_conut(vocab_file, model_file, input_file, identifier_txt_file, identifier_token_file, output_file, beam_size)

    model_file = GENERATOR_DIR + '/data/models/gpt_fconv_1.pt'
    output_file = GENERATOR_DIR + '/data/patches/gpt_fconv_1.txt'
    generate_gpt_fconv(vocab_file, model_file, input_file, identifier_txt_file, identifier_token_file, output_file, beam_size)


ModuleNotFoundError: ignored