## Installation & imports

### Installation

In [None]:
!pip install datasets
!pip install accelerate
!pip install einops
!pip install transformers>=4.32.0
!pip install optimum>=1.12.0
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  # Use cu117 if on CUDA 11.7

In [None]:
!nvcc --version

### Imports

In [None]:
from transformers import AutoTokenizer
import transformers
import torch
from torch.utils.data import Dataset

if torch.cuda.is_available():
    TORCH_DTYPE = torch.float16
    DEFAULT_DEVICE = "cuda"
else:
    TORCH_DTYPE = torch.float32
    DEFAULT_DEVICE = "cpu"

torch.set_default_device(DEFAULT_DEVICE)
transformers.__version__

In [None]:
import random
import gc

SEED = 123

In [None]:
import locale
locale.getpreferredencoding = (lambda *args: 'UTF-8')

In [None]:
import json
from copy import deepcopy
from datasets import load_dataset
from google.colab import files
from tqdm import tqdm

## Data preparation

### Mercury Eval (256 problems)

In [None]:
mercury = load_dataset('Elfsong/Mercury', split='eval')

In [None]:
class ListDataset(Dataset):
    def __init__(self, original_list):
        self.original_list = original_list

    def __len__(self):
        return len(self.original_list)

    def __getitem__(self, i):
        return self.original_list[i]

In [None]:
def prompt_generate(instance):
    content = instance['pretty_content'][0]
    code_prompt = instance['prompt']
    prompt = f"Complete python3 code to solve the following coding problem.\n{content}\n{code_prompt}"
    return prompt

In [None]:
prompts = ListDataset([prompt_generate(elem) for elem in mercury])

## Generation

### Code Llama (7B)

#### Model init

In [None]:
model_name = "TheBloke/CodeLlama-7B-Python-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_code_llama_7b = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_code_llama_7b(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.4,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text'].strip()
    for sequence in tqdm(sequences, total=len(prompts))
]

#### TearDown

In [None]:
del tokenizer
del model_code_llama_7b
gc.collect()

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

### Phi-1 (1.3B)


#### Model init

In [None]:
model_name = "microsoft/phi-1"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_phi_1 = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_phi_1(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.2,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text'].strip()
    for sequence in tqdm(sequences, total=len(prompts))
]

#### Teardown

In [None]:
del tokenizer
del model_phi_1
gc.collect()

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

### Phi-2 (2.7B)

#### Model init

In [None]:
model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_phi_2 = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_phi_2(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.3,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text']
    for sequence in tqdm(sequences, total=len(prompts))
]

#### Teardown

In [None]:
del tokenizer
del model_phi_2
gc.collect()

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

## Postprocessing

In [None]:
request = 'Complete python3 code to solve the following coding problem.'

completions = dict()
for idx, instance in enumerate(mercury):
  content = instance['pretty_content'][0]
  text_prompt = f'{request}\n{content}\n'
  slug_name = instance['slug_name']
  completions[slug_name] = output[idx].replace(text_prompt, '')

In [None]:
with open('completions.json', 'w', encoding='utf-8') as file:
  file.write(json.dumps(completions))

In [None]:
files.download('completions.json')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>