In [1]:
from transformers import pipeline
import torch

# Create a text generation pipeline
text_generator = pipeline(
    "text-generation",
    model="deepseek-ai/deepseek-coder-7b-instruct",
    device_map='cuda:3',
    # torch_dtype=torch.bfloat16
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
def generate(instruction):
    prompt = text_generator.tokenizer.apply_chat_template(
        [{ 'role': 'user', 'content': instruction}],
        add_generation_prompt=True,
        tokenize=False
    )
    
    generated_text = text_generator(
        prompt,
        max_new_tokens=2024,
        return_full_text=False
    )
    
    return generated_text[0]['generated_text']

In [8]:
with open('items.txt') as f:
    items = [line.strip() for line in f.readlines()] 

# Generate code

In [43]:
# def compose_prompt(instruction):
#     system_prompt = "You are an AI programmer that provides Python code that satisfies the given instruction.\
#     The response should be only Python function without any tests. Here is an example:\
#     ### Instruction:\nWrite a Python function that adds two numbers\n### Response:\ndef add_two_numbers(num1, num2):\n\treturn num1 + num2"
#     return f'<｜begin▁of▁sentence｜>{system_prompt}### Instruction:\n{instruction}\n### Response:\n'

## Single quality

In [57]:
from tqdm import tqdm

for item in tqdm(items):
    instruction = f"Write a Python function that implements {item} without any tests and usage examples."
    response = generate(instruction)
    with open(f'generated_code_raw/{item}.txt', 'w') as f:
        f.write(response)

100%|██████████| 91/91 [14:54<00:00,  9.83s/it]


## Multiple Quality

In [11]:
from tqdm import tqdm

for item in tqdm(items):
    with open(f"generated_code_raw/{item}.txt") as f:
        info = f.read()
    instruction = f"{info}\n\nProvide three different implementations of the {item}: junior developer implementation, middle developer implementation, and senior developer implementation."
    response = generate(instruction)
    with open(f'generated_code_raw_various_quality/{item}.txt', 'w') as f:
        f.write(response)

100%|██████████| 91/91 [35:43<00:00, 23.56s/it]


# Generate tests

In [12]:
from tqdm import tqdm

for item in tqdm(items):
    with open(f'generated_code_raw/{item}.txt') as f:
        info = f.read()
    instruction = f"{info}\n\nPlease, write Python code that does a comprehensive testing of the Python function."
    response = generate(instruction)
    with open(f'generated_tests_raw/{item}.txt', 'w') as f:
        f.write(response)

100%|██████████| 91/91 [24:24<00:00, 16.09s/it]
