In [1]:
import torch
from tqdm.auto import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

In [2]:
torch.set_default_device("cuda")

# Phi-1
https://huggingface.co/microsoft/phi-1

In [3]:
""" Models
microsoft/phi-1 (1.3 billion parameters)
microsoft/phi-1_5 (1.3 billion parameters)
microsoft/phi-2 (2.7 billion parameters)
Qwen/Qwen1.5-0.5B-Chat
"""
LLM_MODEL_NAME = "Qwen/Qwen1.5-0.5B-Chat"

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:

model = AutoModelForCausalLM.from_pretrained(
    LLM_MODEL_NAME,
    torch_dtype="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(
    LLM_MODEL_NAME,
    trust_remote_code=True
)
print("Model loaded")

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Model loaded


## Example usage

In [12]:
problem_statement = '''
def double_number(num):
    """
    Returns the double of the input number.

    Parameters:
    num (int or float): The number to be doubled.

    Returns:
    int or float: The double of the input number.
    """
'''

In [13]:
inputs = tokenizer(
   problem_statement,
   return_tensors="pt",
   return_attention_mask=False
)

In [24]:
outputs = model.generate(**inputs, max_length=256, temperature=0.1)

In [25]:
print(tokenizer.batch_decode(outputs)[0])


def double_number(num):
    """
    Returns the double of the input number.

    Parameters:
    num (int or float): The number to be doubled.

    Returns:
    int or float: The double of the input number.
    """
    return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return ret

In [27]:
text = tokenizer.batch_decode(outputs)[0]
# cut the text at tokenizer.eos_token
text = text[: text.find(tokenizer.eos_token)]
# remove the problem statement
text = text[len(problem_statement):]
# the text should have indentation as is part of a function
# if in a new line it not indented, remove it till the end
res = ""
for line in text.split("\n"):
    # if the line is not empty, and not indented, break
    if line and not line.startswith(" "):
        break
    res += line + "\n"
print(res)

    return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return return re

## Predictions

In [28]:
from human_eval.data import write_jsonl, read_problems

In [29]:
problems = read_problems()
print(f"Loaded {len(problems)} problems")

Loaded 164 problems


In [30]:
def generate_one_completion(problem_statement, temp=0.0):
    inputs = tokenizer(
        problem_statement,
        return_tensors="pt",
        return_attention_mask=False
    )
    outputs = model.generate(**inputs, max_length=512, temperature=temp)
    text = tokenizer.batch_decode(outputs)[0]
    # cut the text at tokenizer.eos_token
    text = text[: text.find(tokenizer.eos_token)]
    # remove the problem statement
    text = text[len(problem_statement):]

    # the text should have indentation as is part of a function
    # if in a new line it not indented, remove it till the end
    res = ""
    for line in text.split("\n"):
        # if the line is not empty, and not indented, break
        if line and not line.startswith(" "):
            break
        res += line + "\n"
    
    return res

In [31]:
# we can generate more than one candidate per task
# later one pass@1, pass@10... will be used to evaluate the model
num_samples_per_task = 1

In [33]:
results= []
for task_id in tqdm(problems):
    for _ in range(num_samples_per_task):
        results.append({
            'task_id': task_id,
            'completion': generate_one_completion(
                problems[task_id]['prompt'], temp=0.1
            ),
        })

  0%|          | 0/164 [00:00<?, ?it/s]

In [34]:
write_jsonl(f"submission_hf_{LLM_MODEL_NAME.replace('/', '-')}.jsonl", results)
# Now run: $ evaluate_functional_correctness samples.jsonl

In [16]:
h15 = generate_one_completion(problems["HumanEval/15"]["prompt"], temp=0.75)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [17]:
print(h15)

    return''.join(str(i) for i in range(n+1))


