In [1]:
%pip install transformers SentencePiece accelerate

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting SentencePiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m26.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.1

In [2]:
!pip install "git+https://github.com/evalplus/evalplus.git" --upgrade

Collecting git+https://github.com/evalplus/evalplus.git
  Cloning https://github.com/evalplus/evalplus.git to /tmp/pip-req-build-vdgfjuw8
  Running command git clone --filter=blob:none --quiet https://github.com/evalplus/evalplus.git /tmp/pip-req-build-vdgfjuw8
  Resolved https://github.com/evalplus/evalplus.git to commit 2930c05c6c4f1c2e6ffdf03d811aaff3fbf32df8
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting wget>=3.2 (from evalplus==0.1.6)
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tempdir>=0.7.1 (from evalplus==0.1.6)
  Downloading tempdir-0.7.1.tar.gz (5.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: evalplus, tempdir, wget
  Building wheel for evalplus (pyproject.toml) ... [?25l[?25hdone

In [3]:
import os
import re
from tqdm import tqdm
import transformers, torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, LlamaTokenizer, LlamaForCausalLM, GenerationConfig

In [None]:
start_index = 0
end_index = 164
max_len = 600
STOP_SEQS = ['\nclass', '\ndef', '\n#', '\nif', '\nprint']
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
## problems in evalplus
from evalplus.data import get_human_eval_plus, write_jsonl

plus_problems = get_human_eval_plus()

plus_task_ids = sorted(plus_problems.keys())[start_index: end_index]
plus_prompts = [plus_problems[task_id]['prompt'] for task_id in plus_task_ids]
num_samples = len(plus_prompts)
print("Number of samples: {}".format(num_samples))

Downloading HumanEvalPlus dataset...
Number of samples: 164


In [None]:
def generate_completion_samples_codeT5(model,temp, output_file,loop):
  ## defining model
  model = model
  output_file = output_file

  tokenizer = AutoTokenizer.from_pretrained(model)

  model = AutoModelForSeq2SeqLM.from_pretrained(model,
                                                trust_remote_code=True,
                                                torch_dtype=torch.float16,
                                                low_cpu_mem_usage=True)

  model.eval()
  model.to(DEVICE)
  completion_seqs = []
  loops = loop

  for i in tqdm(range(num_samples), ncols=0, total=num_samples):
    prompt = plus_prompts[i].replace('    ', '\t')

    prompt_batch_decoder = [prompt]
    ids_batch = [plus_task_ids[i]]

    encoding_decoder = tokenizer(prompt_batch_decoder, return_tensors="pt", truncation=True, max_length=max_len).to(DEVICE)

    for _ in tqdm(range(loops), total=loops, leave=False, ncols=0):

      with torch.no_grad():
        gen_tokens = model.generate(**encoding_decoder,
                                  decoder_input_ids=encoding_decoder['input_ids'],
                                  do_sample=True,
                                  temperature=temp,
                                  max_length=max_len,
                                  decoder_start_token_id=tokenizer.pad_token_id,
                                  eos_token_id=tokenizer.eos_token_id,
                                  top_p=0.95)
      gen_tokens = gen_tokens[:, encoding_decoder['input_ids'].shape[-1]:]

      gen_seqs = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)

      if gen_seqs is not None:
        assert len(ids_batch) == 1
        task_id = ids_batch[0]

        for seq_idx, gen_seq in enumerate(gen_seqs):

          completion_seq = gen_seq
          for stop_seq in STOP_SEQS:
              index = completion_seq.find(stop_seq)
              if index != -1:
                  completion_seq = completion_seq[:index]
          completion_seq = completion_seq.replace('\t', '    ')
          all_code = prompt.replace('\t', '    ') + completion_seq

          completion_seqs.append(
              {'task_id': task_id,
                'completion': completion_seq
                }
            )



  print("Saving results to {}".format(output_file))

  write_jsonl(output_file, completion_seqs)



In [None]:
def generate_completion_samples_llama(model,temp, output_file,loop):
  ## defining model
  model = model
  output_file = output_file

  tokenizer = LlamaTokenizer.from_pretrained(model)

  model = LlamaForCausalLM.from_pretrained(model,
                                                trust_remote_code=True,
                                                torch_dtype=torch.float16,
                                                low_cpu_mem_usage=True)

  model.eval()
  model.to(DEVICE)
  completion_seqs = []
  loops = loop

  generation_config = transformers.GenerationConfig(
    do_sample=True,
    temperature=temp,
    top_p=0.95,
    max_new_tokens = max_len,
  )

  for i in tqdm(range(num_samples), ncols=0, total=num_samples):
    prompt = plus_prompts[i].replace('    ', '\t')

    prompt_batch_decoder = [prompt]
    ids_batch = [plus_task_ids[i]]

    encoding_decoder = tokenizer(prompt_batch_decoder, return_tensors="pt", truncation=True, max_length=max_len).to(DEVICE)
    input_ids=encoding_decoder['input_ids']

    for _ in tqdm(range(loops), total=loops, leave=False, ncols=0):

      with torch.no_grad():
        gen_tokens = model.generate(
                                  input_ids=input_ids,
                                  attention_mask=torch.ones_like(input_ids),
                                  generation_config=generation_config
                                  )

      gen_tokens = gen_tokens[:, encoding_decoder['input_ids'].shape[-1]:]

      gen_seqs = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)

      if gen_seqs is not None:
        assert len(ids_batch) == 1
        task_id = ids_batch[0]

        for seq_idx, gen_seq in enumerate(gen_seqs):

          completion_seq = gen_seq
          for stop_seq in STOP_SEQS:
              index = completion_seq.find(stop_seq)
              if index != -1:
                  completion_seq = completion_seq[:index]
          completion_seq = completion_seq.replace('\t', '    ')
          all_code = prompt.replace('\t', '    ') + completion_seq

          completion_seqs.append(
              {'task_id': task_id,
                'completion': completion_seq
                }
            )



  print("Saving results to {}".format(output_file))

  write_jsonl(output_file, completion_seqs)

In [None]:
generate_completion_samples_codeT5("Salesforce/codet5p-2b",0.2,"codet5_2b_samples.jsonl",1)

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


Downloading (…)lve/main/config.json:   0%|          | 0.00/427 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json: 0.00B [00:00, ?B/s]

Downloading shards:   0%|          | 0/33 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00002-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00003-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00004-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00005-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00006-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00007-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00008-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00009-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00010-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00011-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00012-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00013-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00014-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00015-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00016-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00017-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00018-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00019-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00020-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00021-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00022-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00023-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00024-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00025-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00026-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00027-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00028-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00029-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00030-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00031-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00032-of-00033.bin:   0%|          | 0.00/405M [00:00<?, ?B/s]

Downloading (…)l-00033-of-00033.bin:   0%|          | 0.00/524M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

  0% 0/164 [00:00<?, ?it/s]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:45<00:00, 45.45s/it][A
  1% 1/164 [00:45<2:03:33, 45.48s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:40<00:00, 40.65s/it][A
  1% 2/164 [01:26<1:55:09, 42.65s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:12<00:00, 12.16s/it][A
  2% 3/164 [01:38<1:17:06, 28.73s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:40<00:00, 40.86s/it][A
  2% 4/164 [02:19<1:29:24, 33.53s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:41<00:00, 41.11s/it][A
  3% 5/164 [03:00<1:36:06, 36.27s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:15<00:00, 15.21s/it][A
  4% 6/164 [03:15<1:16:39, 29.11s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:07<00:00,  7.16s/it][A
  4% 7/164 [03:22<57:25, 21.94s/it]  
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:41<00:00, 41.20s/it][A
  5% 8/164 [04:03<1:13:00, 28.08s/it]
  0% 0/1 [00:00<?, ?it/s][A
100% 1/1 [00:08<00:00,  8.89s/it][A
  5% 9/164 [04:12<57:03, 22.09s/it]  
  0% 0/1 [00:00<?, ?it/s][A
100% 1/

Saving results to llama_7b_hf_samples.jsonl





In [None]:
generate_completion_samples_llama("decapoda-research/llama-7b-hf",0.2,"llama_7b_hf_samples.jsonl",1)