## Installation & imports

### Installation

In [None]:
!pip install datasets
!pip install accelerate
!pip install einops
!pip install transformers>=4.32.0
!pip install optimum>=1.12.0
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  # Use cu117 if on CUDA 11.7

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (

In [None]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Jun__6_02:18:23_PDT_2024
Cuda compilation tools, release 12.5, V12.5.82
Build cuda_12.5.r12.5/compiler.34385749_0


### Imports

In [None]:
from transformers import AutoTokenizer
import transformers
import torch
from torch.utils.data import Dataset

if torch.cuda.is_available():
    TORCH_DTYPE = torch.float16
    DEFAULT_DEVICE = "cuda"
else:
    TORCH_DTYPE = torch.float32
    DEFAULT_DEVICE = "cpu"

torch.set_default_device(DEFAULT_DEVICE)
transformers.__version__

'4.51.3'

In [None]:
import random
import gc

SEED = 123

In [None]:
import locale
locale.getpreferredencoding = (lambda *args: 'UTF-8')

In [None]:
import json
from copy import deepcopy
from datasets import load_dataset
from google.colab import files
from tqdm import tqdm

## Data preparation

### Mercury Eval (256 problems)

In [None]:
mercury = load_dataset('Elfsong/Mercury', split='eval')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/3.75k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/7.58M [00:00<?, ?B/s]

eval-00000-of-00001.parquet:   0%|          | 0.00/2.94M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1633 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/256 [00:00<?, ? examples/s]

In [None]:
class ListDataset(Dataset):
    def __init__(self, original_list):
        self.original_list = original_list

    def __len__(self):
        return len(self.original_list)

    def __getitem__(self, i):
        return self.original_list[i]

In [None]:
def prompt_generate(instance):
    content = instance['pretty_content'][0]
    code_prompt = instance['prompt']
    prompt = f"Complete python3 code to solve the following coding problem.\n{content}\n{code_prompt}"
    return prompt

In [None]:
prompts = ListDataset([prompt_generate(elem) for elem in mercury])

## Generation

### Code Llama (7B)

#### Model init

In [None]:
model_name = "TheBloke/CodeLlama-7B-Python-GPTQ"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_code_llama_7b = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_code_llama_7b(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.4,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text'].strip()
    for sequence in tqdm(sequences, total=len(prompts))
]

#### TearDown

In [None]:
del tokenizer
del model_code_llama_7b
gc.collect()

282

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

4221281280

### Phi-1 (1.3B)


#### Model init

In [None]:
model_name = "microsoft/phi-1"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_phi_1 = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/734 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]

Device set to use cuda:0


#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_phi_1(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.2,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text'].strip()
    for sequence in tqdm(sequences, total=len(prompts))
]

100%|██████████| 256/256 [11:53<00:00,  2.79s/it]


#### Teardown

In [None]:
del tokenizer
del model_phi_1
gc.collect()

52

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

2845061632

### Phi-2 (2.7B)

#### Model init

In [None]:
model_name = "microsoft/phi-2"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, padding_side='left')
tokenizer.pad_token_id = tokenizer.eos_token_id

model_phi_2 = transformers.pipeline(
    "text-generation",
    model=model_name,
    tokenizer=tokenizer,
    torch_dtype=TORCH_DTYPE,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Device set to use cuda:0


#### Mercury

In [None]:
random.seed(SEED)
torch.manual_seed(SEED)

sequences = model_phi_2(
    prompts,
    do_sample=True,
    batch_size=4,
    top_k=10,
    temperature=0.3,
    top_p=0.95,
    num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=512,
)

output = [
    sequence[0]['generated_text']
    for sequence in tqdm(sequences, total=len(prompts))
]

100%|██████████| 256/256 [27:38<00:00,  6.48s/it]


#### Teardown

In [None]:
del tokenizer
del model_phi_2
gc.collect()

30

In [None]:
torch.cuda.empty_cache()
torch.cuda.memory_allocated()

5567887872

## Postprocessing

In [None]:
request = 'Complete python3 code to solve the following coding problem.'

completions = dict()
for idx, instance in enumerate(mercury):
  content = instance['pretty_content'][0]
  text_prompt = f'{request}\n{content}\n'
  slug_name = instance['slug_name']
  completions[slug_name] = output[idx].replace(text_prompt, '')

In [None]:
with open('completions.jsonl', 'w', encoding='utf-8') as file:
  file.write(json.dumps(completions))

In [None]:
files.download('completions.jsonl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>