In [1]:
!pip install -U transformers bitsandbytes accelerate human-eval
!pip install --upgrade "autoawq>=0.1.6" "transformers>=4.35.0"

Collecting transformers
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.8/126.8 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Collecting accelerate
  Downloading accelerate-0.26.1-py3-none-any.whl.metadata (18 kB)
Collecting human-eval
  Downloading human_eval-1.0.3-py3-none-any.whl.metadata (153 bytes)
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.20.2-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2023.12.25-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.9/40.9 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.19,>=0.14 (from transformers)
  Downloading tokenizers-0.15.0-cp310-cp310-ma

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from IPython.display import clear_output, display, HTML

model_name_or_path = "TheBloke/Magicoder-S-DS-6.7B-AWQ"
tokenizer1 = AutoTokenizer.from_pretrained(model_name_or_path)
model1 = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    low_cpu_mem_usage=True,
    device_map="cuda:0"
)


model_name_or_path = "TheBloke/deepseek-coder-6.7B-instruct-AWQ"
tokenizer2 = AutoTokenizer.from_pretrained(model_name_or_path)
model2 = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    low_cpu_mem_usage=True,
    device_map="cuda:0"
)

prompt_template1="""You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.

@@ Instruction
{instruction}

@@ Response
{response}"""

prompt_template2="""You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
### Instruction:
{instruction}
### Response:
{response}"""


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from IPython.display import clear_output, display, HTML
import concurrent.futures
import torch
import numpy as np
import random

prompt_template1="""You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.

@@ Instruction
{instruction}

@@ Response
{response}"""

prompt_template2="""You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
### Instruction:
{instruction}
### Response:
{response}"""

# Define a function to run model prediction
def run_model_prediction(tokenizer, model, input_text, device="cuda"):
    inputs = tokenizer([input_text], return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_new_tokens=1, return_dict_in_generate=True, output_scores=True)
    transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)
    input_length = inputs.input_ids.shape[1]
    generated_tokens = outputs.sequences[:, input_length:]
    probability = np.exp(transition_scores.cpu().numpy()).squeeze()
    t_score = transition_scores.cpu().numpy().squeeze()
    return generated_tokens, probability, t_score

# Main loop
input_text = r"Generate a react web app with basic user authentication and style it with tailwind CSS."
response = ""
display_text = ""

for n in range(400):
    clear_output(wait=True)
    
    input_text1 = prompt_template1.format(instruction=input_text, response=response)
    input_text2 = prompt_template2.format(instruction=input_text, response=response)
    
    # Run both parts in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future1 = executor.submit(run_model_prediction, tokenizer1, model1, input_text1)
        future2 = executor.submit(run_model_prediction, tokenizer2, model2, input_text2)
        
        generated_tokens1, probability1, t_score1 = future1.result()
        generated_tokens2, probability2, t_score2 = future2.result()
    
    # Choose the token with the higher probability
    if t_score1 > t_score2: # probability1 > probability2 should not matter what you choose
        gen_token = tokenizer1.decode(generated_tokens1[0])
        color = '#93f5af'  # green
    else:
        gen_token = tokenizer2.decode(generated_tokens2[0])
        color = '#93c4f5'  # blue
    
    response += gen_token
    html_token = gen_token.replace("\n", "<br>")
    display_text += f'<span style="color: {color}">{html_token}</span>'
    display(HTML(display_text.replace(r"\n", "<br>")))
    
    print(f"| {tokenizer1.decode(generated_tokens1[0]):8s} | {probability1:.4f} | {probability1:.2%}")
    print(f"| {tokenizer2.decode(generated_tokens2[0]):8s} | {probability2:.4f} | {probability2:.2%}")

| 
        | 0.9998 | 99.98%
| 
        | 1.0000 | 100.00%


In [3]:
def generate_completions(n, prompt):
    input_text = prompt# r"Generate a react web app with basic user authentication and style it with tailwind css."
    response = ""
    display_text = ""
    generated_tokens_list = []

    for _ in range(n):
        # clear_output(wait=True)
        
        input_text1 = prompt_template1.format(instruction=input_text, response=response)
        input_text2 = prompt_template2.format(instruction=input_text, response=response)
        
        with concurrent.futures.ThreadPoolExecutor() as executor:
            future1 = executor.submit(run_model_prediction, tokenizer1, model1, input_text1)
            future2 = executor.submit(run_model_prediction, tokenizer2, model2, input_text2)
            
            generated_tokens1, probability1, t_prob1 = future1.result()
            generated_tokens2, probability2, t_prob2 = future2.result()
        
        if probability1 > probability2:
            gen_token = tokenizer1.decode(generated_tokens1[0])
            color = '#93f5af'  # green
        else:
            gen_token = tokenizer2.decode(generated_tokens2[0])
            color = '#93c4f5'  # blue

        if gen_token == "<|EOT|>":
            break
        if gen_token == " <｜end▁of▁sentence｜>":
            break
        response += gen_token
        html_token = gen_token.replace("\n", "<br>")
        display_text += f'<span style="color: {color}">{html_token}</span>'
        # display(HTML(display_text.replace(r"\n", "<br>")))
        
        generated_tokens_list.append(gen_token)
        
        # print(f"| {tokenizer1.decode(generated_tokens1[0]):8s} | {probability1:.4f} | {probability1:.2%}")
        # print(f"| {tokenizer2.decode(generated_tokens2[0]):8s} | {probability2:.4f} | {probability2:.2%}")
        if response.endswith("\n\n\n"):
            break

    print(response)
    return response



n = 10
completions = generate_completions(n, "Create a react exxample with Tailwind")
print(completions)



Sure, here's a simple example of a
Sure, here's a simple example of a


In [5]:
from human_eval.data import write_jsonl, read_problems

problems = read_problems()


In [None]:
# from human_eval.data import write_jsonl, read_problems
# from tqdm import tqdm 

# def generate_one_completion(prompt: str):
#     """TODO: Fix the generate_completion to include the ```prompt from humaneval in the output"""
#   return generate_completions(300, prompt)




# num_samples_per_task = 1
# samples = [
#     dict(task_id=task_id, completion=generate_one_completion(problems[task_id]["prompt"]))
#     for task_id in tqdm(problems)
#     for _ in range(num_samples_per_task)
# ]
# write_jsonl("combined.jsonl", samples)