In [7]:
!pip install datasets -q
!pip install -U bitsandbytes -q
!pip install sacrebleu -q
!pip install evaluate -q
!pip install rouge_score -q
!pip install bert_score -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.0/104.0 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


quantization_config = BitsAndBytesConfig(load_in_8bit=True)

tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
model = AutoModelForCausalLM.from_pretrained("codellama/CodeLlama-7b-Instruct-hf",
                                             quantization_config=quantization_config)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.59k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/646 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [3]:
from datasets import load_dataset


dataset = load_dataset("ksych/code-completion-tiny")

README.md:   0%|          | 0.00/341 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/13.7k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/50 [00:00<?, ? examples/s]

In [4]:
def get_llama_code_completion_prompt(prefix: str, suffix: str, tokenizer):
    prefix_tokens = tokenizer(prefix)["input_ids"][1:]
    suffix_tokens = tokenizer(suffix)["input_ids"][1:]

    pre_suf = tokenizer("<PRE><SUF>")["input_ids"]
    mid = tokenizer("<MID>")["input_ids"][1:]
    return [pre_suf + suffix_tokens + mid + prefix_tokens]


def extract_single_line(predicted: str, prefix: str):
    mid_ind = predicted.find("<MID>")
    predicted = predicted[mid_ind:]
    processed = predicted.lstrip("<MID>").strip()
    return processed[len(prefix):].strip().splitlines()[0]


In [12]:
from evaluate import load


bleu = load("bleu")
rouge = load("rouge")
chrf = load("chrf")
exact_match = load("exact_match")
bert_score = load("bertscore")


def compute_bleu(prediction: str, reference: str):
    return bleu.compute(predictions=[prediction], references=[reference])["bleu"]


def compute_rouge(prediction: str, reference: str):
    return rouge.compute(predictions=[prediction], references=[reference])["rougeLsum"]


def compute_exact_match(prediction: str, reference: str):
    return exact_match.compute(predictions=[prediction], references=[reference])["exact_match"]


def compute_bertscore(prediction: str, reference: str):
    return bert_score.compute(predictions=[prediction], references=[reference], lang="en")["f1"][0]


def compute_chrf(prediction, reference):
    return chrf.compute(predictions=[prediction], references=[reference])["score"]


metrics_to_function = {
    "exact_match": compute_exact_match,
    "chrf": compute_chrf,
    "bleu": compute_bleu,
    "rouge": compute_rouge,
    "bertscore": compute_bertscore,
}


def compute_metrics(predictions: list[str], references: list[str]):
    assert len(predictions) == len(references)

    metrics: dict[str, list[float]] = {k: [] for k in list(metrics_to_function.keys())}
    for i in range(len(predictions)):
        for metric_name, metric in metrics_to_function.items():
            metrics[metric_name].append(metric(predictions[i].strip(), references[i].strip()))

    avg_metrics = {k: sum(v) / len(v) for k, v in metrics.items()}
    return avg_metrics


In [13]:
from pprint import pprint


def generate(model, tokenizer, entry: dict[str, str]):
    tokens = get_llama_code_completion_prompt(entry["prefix"], entry["suffix"], tokenizer)
    print("PROMPT:", tokenizer.decode(tokens[0]))
    print("====" * 5)

    tokens = torch.tensor(tokens).to(model.device)

    predicted = model.generate(tokens, do_sample=True, top_k=50, top_p=0.9, max_new_tokens=100).squeeze(0)
    decoded = tokenizer.decode(predicted)
    single_line = extract_single_line(decoded, entry["prefix"])

    print("REFERENCE:", entry["reference"])
    print("====" * 5)
    print("PREDICTED:", single_line)
    print("\n")

    return single_line


n_examples = 10

references: list[str] = []
predictions: list[str] = []

for row in dataset["train"].select(range(n_examples)):
    prediction = generate(model, tokenizer, row)
    predictions.append(prediction)
    references.append(row["reference"])

metrics = compute_metrics(predictions, references)
pprint(metrics)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


PROMPT: <s> <PRE><SUF> from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    default_data_collator,
 <MID> import argparse
import math
import os
import yaml
from tqdm import tqdm



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE: import torch

PREDICTED: import time


PROMPT: <s> <PRE><SUF> from accelerate import Accelerator, DistributedDataParallelKwargs, InitProcessGroupKwargs
from src.data import load_data
from src.tokenizer import AudioTokenizer, get_start_tokens
from src.utils import save_checkpoint, fix_checkpoint, get_exp_name
# Parse arguments
 <MID> from torch.utils.data import DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    default_data_collator,



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE:     get_scheduler,

PREDICTED: TrainingArguments,


PROMPT: <s> <PRE><SUF> parser.add_argument(
    "--config", type=str, help="Path to the config.yaml file", required=True
args = parser.parse_args()
# Load config
with open(args.config, "r") as file:
 <MID> from accelerate import Accelerator, DistributedDataParallelKwargs, InitProcessGroupKwargs
from src.data import load_data
from src.tokenizer import AudioTokenizer, get_start_tokens
from src.utils import save_checkpoint, fix_checkpoint, get_exp_name
# Parse arguments



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE: parser = argparse.ArgumentParser(description="Train a model with configuration.")

PREDICTED: parser = argparse.ArgumentParser(


PROMPT: <s> <PRE><SUF> base_model = config["base_model"]
checkpoint_path = config.get("checkpoint_path")
save_dir = config["save_dir"]
data = config["data"]
start_audio_token = config["start_audio_token"]
 <MID> parser.add_argument(
    "--config", type=str, help="Path to the config.yaml file", required=True
args = parser.parse_args()
# Load config
with open(args.config, "r") as file:



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE:     config = yaml.safe_load(file)

PREDICTED: config = yaml.load(file, Loader=yaml.FullLoader)


PROMPT: <s> <PRE><SUF> path_to_cache = config["path_to_cache"]
checkpointing_steps = int(config['checkpointing_steps'])
max_grad_norm = float(config['max_grad_norm'])
torch.backends.cuda.matmul.allow_tf32 = config["allow_tf32"]
torch.backends.cudnn.allow_tf32 = config["allow_tf32"]
 <MID> base_model = config["base_model"]
checkpoint_path = config.get("checkpoint_path")
save_dir = config["save_dir"]
data = config["data"]
start_audio_token = config["start_audio_token"]



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE: end_audio_token = config["end_audio_token"]

PREDICTED: end_audio_token = config["end_audio_token"]


PROMPT: <s> <PRE><SUF>         model,
        dataloader,
        accelerator,
        optimizer,
        lr_scheduler,
 <MID> path_to_cache = config["path_to_cache"]
checkpointing_steps = int(config['checkpointing_steps'])
max_grad_norm = float(config['max_grad_norm'])
torch.backends.cuda.matmul.allow_tf32 = config["allow_tf32"]
torch.backends.cudnn.allow_tf32 = config["allow_tf32"]



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE: def train(

PREDICTED: # Load config.


PROMPT: <s> <PRE><SUF>         progress_bar,
        max_train_steps,
        save_dir
    model.train()
    total_loss = 0
 <MID>         model,
        dataloader,
        accelerator,
        optimizer,
        lr_scheduler,



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE:         completed_steps,

PREDICTED: train_sampler=None,


PROMPT: <s> <PRE><SUF>     for step, batch in enumerate(dataloader):
        with accelerator.accumulate(model):
            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss
 <MID>         progress_bar,
        max_train_steps,
        save_dir
    model.train()
    total_loss = 0



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE:     acc_loss = 0

PREDICTED: l_accuracy = 0


PROMPT: <s> <PRE><SUF>             total_loss += last_loss
            acc_loss += last_loss
            accelerator.backward(loss)
            del batch, loss, outputs
            torch.cuda.empty_cache()
 <MID>     for step, batch in enumerate(dataloader):
        with accelerator.accumulate(model):
            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


REFERENCE:             last_loss = loss.detach().float()

PREDICTED: # Compute loss


PROMPT: <s> <PRE><SUF>             accelerator.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)
 <MID>             total_loss += last_loss
            acc_loss += last_loss
            accelerator.backward(loss)
            del batch, loss, outputs
            torch.cuda.empty_cache()

REFERENCE:         if accelerator.sync_gradients:

PREDICTED: if int(batch_idx % 1000) == 0:




Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'bertscore': 0.8926930129528046,
 'bleu': 0.14257281037462816,
 'chrf': 32.43460184236882,
 'exact_match': 0.1,
 'rouge': 0.3485714285714286}
