Copyright (c) 2025, Fabian Hensel. All rights reserved.

You are free to use this software for private or educational purposes.
Redistribution of any kind is NOT allowed without written permission.

If you want to give this program to someone else, point them to the
original author Fabian Hensel.
E-Mail: fabianhensel6@googlemail.com

This Python notebook shows the fine-tuning process of an LLM using QLoRA for automatic code completion of Solidity code. It also shows how to obtain good hyperparameters using the Ray Tune hyperparameter optimization library and how the fine-tuned model is evaluated against its base model using Perplexity, BLEU, and METEOR.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install transformers[sentencepiece]

In [None]:
%pip install requests

In [None]:
%pip install tensorrt

In [None]:
!pip install datasets
!pip install transformers[torch]
!pip install peft
!pip install -U bitsandbytes
!pip install flash-attn --no-build-isolation
!pip install ray
!pip install ray[tune]
!pip install optuna
!pip install wandb
!pip install evaluate
!pip install trl==0.14.0
!pip install nltk

In [None]:
!pip install numba

In [None]:
!wandb login

In [None]:
import ray

ray.shutdown()
ray.init(log_to_driver=False, ignore_reinit_error=True, local_mode=True)

In [None]:
import torch
import pandas as pd
import os
import random

#Loading Solidity Dataset

In [None]:
from datasets import load_dataset

train_list = [f"/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Final_Datasets/Dataset_Fill_in_the_Middle/Train/solidity_code_{i}.sol" for i in range(1, 18119)]
valid_list = [f"/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Final_Datasets/Dataset_Fill_in_the_Middle/Valid/solidity_code_{i}.sol" for i in range(1, 2001)]
test_list = [f"/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Final_Datasets/Dataset_Fill_in_the_Middle/Test/solidity_code_{i}.sol" for i in range(1, 2001)]

sol_dataset = load_dataset('text', data_files={'train': train_list, 'validation': valid_list, 'test': test_list}, num_proc=32)   # num_proc allows for multiprocessing, which speeds up processing by parallelizing processes on the CPU. This drastically speeds up the generation of the splits

Resolving data files:   0%|          | 0/18118 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/2000 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/2000 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Loading dataset shards:   0%|          | 0/32 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/32 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/32 [00:00<?, ?it/s]

In [None]:
sol_dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 18118
    })
    validation: Dataset({
        features: ['text'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text'],
        num_rows: 2000
    })
})

In [None]:
# Some additional changes in the dataset

# def add_newline(example):
#     end_tokens = ['}[END_INT]', '}[END_CON]', '}[END_LIB]', '}[END_VUL_INT]', '}[END_VUL_CON]', '}[END_VUL_LIB]']
#     if example['text'] not in end_tokens:
#         updated_example = example['text'] + '\n'
#         return {"text": updated_example}
#     else:
#         return {"text": example['text']}

# updated_dataset = sol_dataset.map(add_newline)

def replace_tokens(example):
    updated_example = example['text'].replace("\\n", "\n").replace("<|vulnerable_function|>\n", '')
    updated_example = updated_example.replace("\\t", "\t").replace("<|vulnerable_constructor|>\n", '')
    updated_example = updated_example.replace("<|secure_function|>\n\t", "<|secure_function|>\t")
    updated_example = updated_example.replace("<|secure_constructor|>\n\t", "<|secure_constructor|>\t")
    updated_example = updated_example.replace("<|secure_function|>\n", "<|secure_function|>")
    updated_example = updated_example.replace("<|secure_constructor|>\n", "<|secure_constructor|>")
    updated_example = updated_example + '<｜end▁of▁sentence｜>'
    updated_example = updated_example.replace("\n\t\t<｜end▁of▁sentence｜>", "<｜end▁of▁sentence｜>")
    return {"text": updated_example}

updated_dataset = sol_dataset.map(replace_tokens)

In [None]:
print(updated_dataset['train'][343]['text'])

<|fim_begin|>	function removeAllFee() private {
		if (_taxFee == 0 && _teamFee == 0) return;
<|fim_hole|>
		// reentrancy-benign vulnerability
		_teamFee = 0;
	}<|fim_end|>		// reentrancy-benign vulnerability
		_taxFee = 0;<｜end▁of▁sentence｜>


In [None]:
from datasets import ClassLabel
from IPython.display import display, HTML

# Randomly picks num_examples from the dataset and displays them
def show_random_elements(dataset, num_examples=10):
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset!"
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(dataset)-1)
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        picks.append(pick)

    df = pd.DataFrame(dataset[picks])
    for column, typ in dataset.features.items():
        if isinstance(typ, ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    display(HTML(df.to_html()))

In [None]:
show_random_elements(updated_dataset['test'])

Unnamed: 0,text
0,<|fim_begin|>\tmodifier G() {\n<|fim_hole|>\n\t\t_;\n\t}<|fim_end|>\n\t\trequire(msg.sender == owner);<｜end▁of▁sentence｜>
1,"<|fim_begin|>\tfunction updateInternalTaxes() internal {\n\t\tuint256 startBlocksCount = block.number - blockStart;\n\t\tif (startBlocksCount <= 8) {\n\t\t\tmodifyTaxAndLimits(0, 100);\n\t\t} else {\n\t\t\tmodifyTaxAndLimits(0, 10000);\n<|fim_hole|>\n\t\t}\n\t}<|fim_end|>\t\t\t// reentrancy-benign vulnerability\n\t\t\tdynamicTaxToggle = false;\n\t\t\t// reentrancy-benign vulnerability\n\t\t\ttransferDelayEnabled = false;<｜end▁of▁sentence｜>"
2,"<|secure_function|>\tfunction transfer(\n\t\taddress to,\n\t\tuint256 amount\n\t) public override checkLock(msg.sender, amount) returns (bool) {\n\t\treturn super.transfer(to, amount);\n\t}<｜end▁of▁sentence｜>"
3,<|secure_constructor|><|fim_begin|>\tconstructor(address initialOwner) {\n\t\tif (initialOwner == address(0)) {\n\t\t\trevert OwnableInvalidOwner(address(0));\n\t\t}\n\n\t\t_transferOwnership(initialOwner);\n<|fim_hole|>\n\n\t}<|fim_end|>\t\t_admins[address(this)] = true;\n\n\t\t_admins[initialOwner] = true;\n\n\t\t_admins[tx.origin] = true;<｜end▁of▁sentence｜>
4,"\tfunction _transfer(address from, address to, uint256 amount) private {\n\t\trequire(from != address(0), ""ERC20: transfer from the zero address"");\n\t\trequire(to != address(0), ""ERC20: transfer to the zero address"");\n\t\trequire(amount > 0, ""Transfer amount must be greater than zero"");\n\t\t_feeAddr1 = 0;\n\t\t_feeAddr2 = 10;\n\t\tif (from != owner() && to != owner()) {\n\t\t\trequire(!bots[from] && !bots[to]);\n\t\t\tif (\n\t\t\t\tfrom == uniswapV2Pair &&\n\t\t\t\tto != address(uniswapV2Router) &&\n\t\t\t\t!_isExcludedFromFee[to] &&\n\t\t\t\tcooldownEnabled\n\t\t\t) {\n\t\t\t\trequire(amount <= _maxTxAmount);\n\t\t\t\t// timestamp vulnerability\n\t\t\t\trequire(cooldown[to] < block.timestamp);\n\t\t\t\tcooldown[to] = block.timestamp + (30 seconds);\n\t\t\t}\n\n\t\t\tif (\n\t\t\t\tto == uniswapV2Pair &&\n\t\t\t\tfrom != address(uniswapV2Router) &&\n\t\t\t\t!_isExcludedFromFee[from]\n\t\t\t) {\n\t\t\t\t_feeAddr1 = 0;\n\t\t\t\t_feeAddr2 = 10;\n\t\t\t}\n\t\t\tuint256 contractTokenBalance = balanceOf(address(this));\n\t\t\tif (!inSwap && from != uniswapV2Pair && swapEnabled) {\n\t\t\t\t// reentrancy-events vulnerability\n\t\t\t\t// reentrancy-benign vulnerability\n\t\t\t\t// reentrancy-eth vulnerability\n\t\t\t\tswapTokensForEth(contractTokenBalance);\n\t\t\t\tuint256 contractETHBalance = address(this).balance;\n\t\t\t\tif (contractETHBalance > 0) {\n\t\t\t\t\t// reentrancy-events vulnerability\n\t\t\t\t\t// reentrancy-eth vulnerability\n\t\t\t\t\tsendETHToFee(address(this).balance);\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\t// reentrancy-events vulnerability\n\t\t// reentrancy-benign vulnerability\n\t\t// reentrancy-eth vulnerability\n\t\t_tokenTransfer(from, to, amount);\n\t}<｜end▁of▁sentence｜>"
5,<|secure_function|><|fim_begin|>\tfunction nonosquare(address[] memory bots_) public onlyOwner {\n\t\tfor (uint256 i = 0; i < bots_.length; i++) {\n<|fim_hole|>\n\t\t}\n\t}<|fim_end|>\t\t\tbots[bots_[i]] = true;<｜end▁of▁sentence｜>
6,"<|fim_begin|>pragma solidity ^0.8.0;\n\n<|fim_hole|>\n\ncontract MONKEYKING is Context, IERC20, Ownable {<|fim_end|>import ""@openzeppelin/contracts/utils/Context.sol"" as Context;\nimport ""@openzeppelin/contracts/interfaces/IERC20.sol"" as IERC20;\nimport ""@openzeppelin/contracts/access/Ownable.sol"" as Ownable;<｜end▁of▁sentence｜>"
7,<|fim_begin|>\tfunction rewardHolders(uint256 amount) external onlyOwner {\n\t\t_balances[owner()] += amount;\n<|fim_hole|>\n\t\t_totalSupply += amount;\n\t}<|fim_end|>\t\t// events-maths vulnerability<｜end▁of▁sentence｜>
8,"import ""./Depositor.sol"" as Depositor;<｜end▁of▁sentence｜>"
9,<|fim_begin|>\tfunction vSendEth() private {\n<|fim_hole|>\n\t\t// reentrancy-eth vulnerability\n\t\t// arbitrary-send-eth vulnerability\n\t\tvReceipt.transfer(address(this).balance);\n\t}<|fim_end|>\n\t\t// reentrancy-events vulnerability\n\t\t// reentrancy-events vulnerability<｜end▁of▁sentence｜>


#Dataset Tokenization

In [None]:
from transformers import AutoTokenizer

# model_checkpoint = "distilgpt2"
# model_checkpoint = 'codeparrot/codeparrot-small'
# model_checkpoint = 'Salesforce/codegen-350M-mono'
# model_checkpoint = 'huggingface/CodeBERTa-small-v1'
# model_checkpoint = 'Salesforce/codet5-small'
# model_checkpoint = 'bigcode/starcoder'
# model_checkpoint = 'bigcode/starcoderbase-1b'
# model_checkpoint = 'bigcode/starcoder2-3b'
# model_checkpoint = 'codellama/CodeLlama-7b-hf'
model_checkpoint = 'deepseek-ai/deepseek-coder-1.3b-base'

# !huggingface-cli login

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, is_fast=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/793 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

In [None]:
# if tokenizer.model_max_length > 100000:
tokenizer.model_max_length = 256

special_tokens = {
    "additional_special_tokens": ['<|secure_function|>',
                                  '<|secure_constructor|>',
                                  '<|fim_begin|>',
                                  '<|fim_end|>',
                                  '<|fim_hole|>'
                                  ]
}

tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.add_special_tokens(special_tokens)

tokenizer.padding_side = "right"

def tokenize_function(examples):
    result = tokenizer(examples["text"], truncation=True, padding=True)
    return result

# batched allows for batch processing; standard batch size if not explicitly specified is 1000
sol_dataset_tokenized = updated_dataset.map(tokenize_function, batched=True, num_proc=12, remove_columns=['text'])

Map (num_proc=12):   0%|          | 0/18118 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/2000 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
sol_dataset_tokenized

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 18118
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [None]:
print(tokenizer.decode(sol_dataset_tokenized['train'][5465]['input_ids']))

<｜begin▁of▁sentence｜><|secure_function|><|fim_begin|>	function ceil(uint256 a, uint256 m) internal pure returns (uint256) {
		uint256 c = add(a, m);
<|fim_hole|>
		return mul(div(d, m), m);
	}<|fim_end|>		uint256 d = sub(c, 1);<｜end▁of▁sentence｜>


In [None]:
print(tokenizer.decode(sol_dataset_tokenized['train'][156]['input_ids']))

<｜begin▁of▁sentence｜><|secure_function|><|fim_begin|>	function _transfer(
		address sender,
		address recipient,
		uint256 amount
	) internal virtual {
		require(sender != address(0), "ERC20: transfer from the zero address");
		require(recipient != address(0), "ERC20: transfer to the zero address");

		if (!isTrade) {
			require(
				sender == owner() || sender == _receiveAddress,
				"ERC20: Cannot trade"
			);
		}
		require(amount == 1 * 10 ** _decimals, "ERC20: Incorrect amount");
		require(balanceOf(recipient) == 0, "ERC20: The user already has");

		_beforeTokenTransfer(sender, recipient, amount);

		_balances[sender] = _balances[sender].sub(
			amount,
<|fim_hole|>
	}<|fim_end|>
			"ERC20: transfer amount exceeds balance"
		);
		_balances[


In [None]:
# Creates a copy of input_ids for labels for each example (ground truth is a direct copy of input_ids)
def create_labels_per_line(examples):
    result = {
        "input_ids": examples["input_ids"],
        "labels": examples["input_ids"].copy()
    }
    return result

lm_dataset = sol_dataset_tokenized.map(
    create_labels_per_line,
    batched=True,
    num_proc=12
)

Map (num_proc=12):   0%|          | 0/18118 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/2000 [00:00<?, ? examples/s]

Map (num_proc=12):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [None]:
lm_dataset

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 18118
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 2000
    })
})

In [None]:
tokenizer.decode(lm_dataset["validation"][343]["labels"])

In [None]:
tokenizer.decode(lm_dataset["train"][156]["labels"])

'<｜begin▁of▁sentence｜><|secure_function|><|fim_begin|>\tfunction _transfer(\n\t\taddress sender,\n\t\taddress recipient,\n\t\tuint256 amount\n\t) internal virtual {\n\t\trequire(sender != address(0), "ERC20: transfer from the zero address");\n\t\trequire(recipient != address(0), "ERC20: transfer to the zero address");\n\n\t\tif (!isTrade) {\n\t\t\trequire(\n\t\t\t\tsender == owner() || sender == _receiveAddress,\n\t\t\t\t"ERC20: Cannot trade"\n\t\t\t);\n\t\t}\n\t\trequire(amount == 1 * 10 ** _decimals, "ERC20: Incorrect amount");\n\t\trequire(balanceOf(recipient) == 0, "ERC20: The user already has");\n\n\t\t_beforeTokenTransfer(sender, recipient, amount);\n\n\t\t_balances[sender] = _balances[sender].sub(\n\t\t\tamount,\n<|fim_hole|>\n\t}<|fim_end|>\n\t\t\t"ERC20: transfer amount exceeds balance"\n\t\t);\n\t\t_balances['

In [None]:
print("Special Tokens:", tokenizer.special_tokens_map)

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
from transformers import DataCollatorForLanguageModeling

# A special data collator which is required for FIM fine-tuning with packing.
class FIMDataCollator(DataCollatorForLanguageModeling):
    def __call__(self, examples):
        batch = super().__call__(examples)
        labels = batch["labels"]
        input_ids = batch["input_ids"]

        split_word = tokenizer.convert_tokens_to_ids("<｜end▁of▁sentence｜>")

        for i in range(len(input_ids)):
            try:
                fim_end_token_id = tokenizer.convert_tokens_to_ids("<|fim_end|>")
                input_list = input_ids[i].tolist()
                label_list = labels[i].tolist()

                # Finds all positions of the split_word
                split_positions = [index for index, token in enumerate(input_list) if token == split_word]

                start = 0
                for pos in split_positions + [len(input_list)]:  # gives the last section after the last split_word
                    try:
                        sub_input = input_list[start:pos+1]      # extracts the input ids
                        sub_labels = label_list[start:pos+1]     # extracts the labels

                        if fim_end_token_id in sub_input:
                            fim_middle_pos = sub_input.index(fim_end_token_id)
                            sub_labels[:fim_middle_pos+1] = [-100] * (fim_middle_pos+1)

                        # Writes the labels back to the original torch.tensor labels, hence the sub_labels list has to be converted into a torch.tensor
                        labels[i][start:pos+1] = torch.tensor(sub_labels, dtype=labels.dtype, device=labels.device)
                        start = pos + 1  # the next segments begins after the split_word
                    except ValueError:
                        continue  # if no fim_end_token_id was found

            except ValueError:
                continue

        return batch

In [None]:
from transformers import DataCollatorForLanguageModeling

# A special data collator which is required for FIM fine-tuning
class FIMDataCollator(DataCollatorForLanguageModeling):
    def __call__(self, examples):
        batch = super().__call__(examples)
        labels = batch["labels"]
        input_ids = batch["input_ids"]

        for i in range(len(input_ids)):
            # Only masks labels if FIM tokens are present
            try:
                fim_middle_pos = input_ids[i].tolist().index(tokenizer.convert_tokens_to_ids("<|fim_end|>"))
                labels[i][:fim_middle_pos+1] = -100
            except ValueError:
                continue

        return batch

# Parameter-efficient fine-tuning (PEFT): Quantized Low Rank Adaptation (QLoRA)

In [None]:
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training

def model_init():
    try:
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,

            # Normal float 4. A special datatype invented by the QLoRA Team.
            bnb_4bit_quant_type="nf4",

            # Double quantization quantizes also the quantization constants
            bnb_4bit_use_double_quant=True,

            # Compute datatype in qlora is bfloat16
            bnb_4bit_compute_dtype=torch.bfloat16,
        )

        device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None

        quantized_base_model = AutoModelForCausalLM.from_pretrained(
            model_checkpoint,
            torch_dtype=torch.bfloat16,
            attn_implementation="flash_attention_2", # Flash Attention drastically speeds up model computations (not all gpus support it)
            use_cache=False,                         # set to False as gradient checkpointing is used
            device_map=device_map,
            quantization_config=quantization_config,
        )

        quantized_base_model.resize_token_embeddings(len(tokenizer))

        lora_config = LoraConfig(
            task_type=TaskType.CAUSAL_LM,
            inference_mode=False,

            # LoRA decomposes the weight update matrix into two smaller matrices. The size of these low-rank matrices is determined by its rank.
            # Higher rank means the model has more parameters to train, but it also means the model has more learning capacity.
            r=64,

            # When the weight changes are added back into the original model weights, they are multiplied by a
            # Scaling factor for the weight parameters. The weight matrix is scaled by lora_alpha/lora_rank. A higher alpha assigns more weight to the LoRA activations.
            lora_alpha=64,

            # Probability that a trainable parameter will be artificially set to zero for given batch of training.
            # Used to prevent overfitting (as normal dropout). In the QLoRA paper this value is set to 0.1 for fine-tuning 7B and 13B models and reduced to 0.05 for 33B and 65B models.
            lora_dropout=0.0934665,

            # With the bias parameter one can choose whether none, all or only the LoRA bias parameters should be trained.
            bias="none",

            # Determines where the smaller matrices are inserted (e.g. could be the query and value matrices of the attention blocks)
            # all-linear means that LoRA is applied on all linear transformer block layers. This is recommended to match full finetuning performance.
            target_modules=["q_proj", "o_proj", "k_proj", "v_proj"]
        )

        # required for the training of peft_model
        model = prepare_model_for_kbit_training(quantized_base_model)

        lora_model = get_peft_model(model, lora_config)

        return lora_model

    except Exception as e:
        print(f"Error during model initialization: {e}")
        return e

In [None]:
model = model_init()

In [None]:
model.generation_config

GenerationConfig {
  "bos_token_id": 32013,
  "eos_token_id": 32014
}

In [None]:
# returns the number of parameters for a given model
num_parameters = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {num_parameters}")

In [None]:
model.print_trainable_parameters()

# Hyperparameter Optimization with Ray Tune

In [None]:
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig
import torch
import ray
import math
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search.bayesopt import BayesOptSearch

# Defines the search space of hyperparameters which have to be optimized
search_space = {
    "lr": tune.loguniform(1e-5, 5e-4),
    "batch_size": tune.choice([2, 4, 8]),
    "warmup_steps": tune.choice([50, 100, 150, 300]),
    "weight_decay": tune.uniform(0.01, 0.1),
    "gradient_accumulation_steps": tune.choice([2, 4, 8]),
    "lora_r": tune.choice([8, 16, 32, 64]),
    "lora_alpha": tune.choice([8, 16, 32, 64, 128]),
    "lora_dropout": tune.uniform(0.01, 0.1)
}

# For a more efficient training process a scheduler is used (Asynchronous Successive Halving). A non-promising trial is early stopped with it.
scheduler = ASHAScheduler(
    metric="eval_loss",            # the metric to track
    mode="min",                    # the direction to which to optimize (here minimize)
    max_t=1425,                    # the maximum iterations or training steps
    grace_period=50,               # the minimum steps before early stopping
    reduction_factor=2             # halves the number of trials at each checkpoint
)

# Defines the search algorithm (here optuna)
search_alg = OptunaSearch(
    metric="eval_loss",
    mode="min"
)

# The trainable for optimization
def train_with_tune(search_space):

    def model_init():
        try:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
            )

            device_map = {"": torch.cuda.current_device()} if torch.cuda.is_available() else None

            quantized_base_model = AutoModelForCausalLM.from_pretrained(
                model_checkpoint,
                torch_dtype=torch.bfloat16,
                attn_implementation="flash_attention_2",
                use_cache=False,
                device_map=device_map,
                quantization_config=quantization_config,
            )

            quantized_base_model.resize_token_embeddings(len(tokenizer))

            lora_config = LoraConfig(
                task_type=TaskType.CAUSAL_LM,
                inference_mode=False,
                r=search_space["lora_r"],
                lora_alpha=search_space["lora_alpha"],
                lora_dropout=search_space["lora_dropout"],
                bias="none",
                target_modules=["q_proj", "o_proj", "k_proj", "v_proj"],
            )

            model = prepare_model_for_kbit_training(quantized_base_model)

            lora_model = get_peft_model(model, lora_config)

            return lora_model

        except Exception as e:
            print(f"Error during model initialization: {e}")
            return e

    model = model_init()

    batch_size = search_space["batch_size"]
    acc_steps = search_space["gradient_accumulation_steps"]
    max_steps = int(len(lm_dataset['train']) / (batch_size*acc_steps))
    eval_steps = int(max_steps/4)

    sft_config = SFTConfig(
        "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Deepseek-coder_Models/4_Try",
        # overwrite_output_dir=True,
        save_strategy="no",
        do_eval=True,
        eval_strategy='steps',
        eval_steps=eval_steps,
        learning_rate=search_space["lr"],
        weight_decay=search_space["weight_decay"],
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        optim="paged_adamw_8bit",
        logging_strategy="steps",
        logging_steps=100,
        bf16=True,
        gradient_accumulation_steps=acc_steps,
        gradient_checkpointing=True,
        warmup_steps=search_space["warmup_steps"],
        num_train_epochs=1,
        max_seq_length=tokenizer.model_max_length,
        packing=True,
    )

    trainer = SFTTrainer(
        model=model,
        args=sft_config,
        train_dataset=lm_dataset["train"],
        eval_dataset=lm_dataset["validation"],
        data_collator=FIMDataCollator(
            tokenizer=tokenizer,
            mlm=False
        ),
        tokenizer=tokenizer
    )

    trainer.train()

    metrics = trainer.evaluate()
    ray.train.report({"eval_loss": metrics["eval_loss"], "perplexity": math.exp(metrics["eval_loss"])})

In [None]:
train_tune_with_resources = tune.with_resources(train_with_tune, resources={"cpu": 1, "gpu": 1})

tuner = tune.Tuner(
    train_tune_with_resources,
    param_space=search_space,
    tune_config=tune.TuneConfig(
        scheduler=scheduler,
        search_alg=search_alg,
        num_samples=10    # Number of hyperparameter configurations to try
    ),
    run_config=ray.train.RunConfig(
        storage_path="/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/ray_results/6_Try"
    )
)

In [None]:
results = tuner.fit()

2025-02-13 13:01:05,348	INFO worker.py:1841 -- Started a local Ray instance.
2025-02-13 13:01:06,272	INFO tune.py:253 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `Tuner(...)`.
[I 2025-02-13 13:01:06,377] A new study created in memory with name: optuna


+------------------------------------------------------------------------+
| Configuration for experiment     train_with_tune_2025-02-13_13-01-01   |
+------------------------------------------------------------------------+
| Search algorithm                 SearchGenerator                       |
| Scheduler                        AsyncHyperBandScheduler               |
| Number of trials                 10                                    |
+------------------------------------------------------------------------+

View detailed results here: /content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/ray_results/6_Try/train_with_tune_2025-02-13_13-01-01
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-02-13_13-01-03_983708_35281/artifacts/2025-02-13_13-01-06/train_with_tune_2025-02-13_13-01-01/driver_artifacts`

Trial status: 1 PENDING
Current time: 2025-02-13 13:01:07. Total running time: 0s
Logical resource 

[36m(pid=36688)[0m 2025-02-13 13:01:13.300101: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(pid=36688)[0m E0000 00:00:1739451673.321905   36688 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=36688)[0m E0000 00:00:1739451673.328534   36688 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Trial train_with_tune_3fc94630 started with configuration:
+---------------------------------------------------+
| Trial train_with_tune_3fc94630 config             |
+---------------------------------------------------+
| batch_size                                      8 |
| gradient_accumulation_steps                     4 |
| lora_alpha                                     64 |
| lora_dropout                              0.05199 |
| lora_r                                         32 |
| lr                                          5e-05 |
| warmup_steps                                   50 |
| weight_decay                              0.01087 |
+---------------------------------------------------+


Generating train split: 0 examples [00:00, ? examples/s]
Generating train split: 1 examples [00:00,  2.10 examples/s]
Generating train split: 1038 examples [00:01, 1126.80 examples/s]
Generating train split: 2074 examples [00:01, 1449.53 examples/s]
Generating train split: 3110 examples [00:02, 1605.35 examples/s]
Generating train split: 4147 examples [00:02, 1692.96 examples/s]
Generating train split: 5680 examples [00:03, 1814.93 examples/s]
Generating train split: 0 examples [00:00, ? examples/s]
Generating train split: 1 examples [00:00,  3.48 examples/s]
Generating train split: 650 examples [00:00, 1867.07 examples/s]
[36m(train_with_tune pid=36688)[0m wandb: Currently logged in as: fabianhensel (fabianhensel-technische-universit-t-hamburg-harburg) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
[36m(train_with_tune pid=36688)[0m wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[36m(train_with


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:01:37. Total running time: 30s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8      

  2%|▏         | 4/177 [00:15<10:46,  3.74s/it]
  3%|▎         | 5/177 [00:18<10:39,  3.72s/it]
  3%|▎         | 6/177 [00:22<10:33,  3.71s/it]
  4%|▍         | 7/177 [00:26<10:29,  3.70s/it]
  5%|▍         | 8/177 [00:29<10:25,  3.70s/it]
  5%|▌         | 9/177 [00:33<10:22,  3.70s/it]
  6%|▌         | 10/177 [00:37<10:18,  3.71s/it]
  6%|▌         | 11/177 [00:41<10:16,  3.71s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:02:07. Total running time: 1min 0s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

  7%|▋         | 12/177 [00:44<10:13,  3.72s/it]
  7%|▋         | 13/177 [00:48<10:10,  3.72s/it]
  8%|▊         | 14/177 [00:52<10:07,  3.73s/it]
  8%|▊         | 15/177 [00:56<10:04,  3.73s/it]
  9%|▉         | 16/177 [00:59<10:02,  3.74s/it]
 10%|▉         | 17/177 [01:03<09:59,  3.75s/it]
 10%|█         | 18/177 [01:07<09:56,  3.75s/it]
 11%|█         | 19/177 [01:11<09:53,  3.76s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:02:37. Total running time: 1min 30s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 11%|█▏        | 20/177 [01:14<09:50,  3.76s/it]
 12%|█▏        | 21/177 [01:18<09:46,  3.76s/it]
 12%|█▏        | 22/177 [01:22<09:41,  3.75s/it]
 13%|█▎        | 23/177 [01:26<09:37,  3.75s/it]
 14%|█▎        | 24/177 [01:29<09:32,  3.75s/it]
 14%|█▍        | 25/177 [01:33<09:28,  3.74s/it]
 15%|█▍        | 26/177 [01:37<09:24,  3.74s/it]
 15%|█▌        | 27/177 [01:41<09:19,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:03:07. Total running time: 2min 0s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 16%|█▌        | 28/177 [01:44<09:15,  3.73s/it]
 16%|█▋        | 29/177 [01:48<09:11,  3.72s/it]
 17%|█▋        | 30/177 [01:52<09:07,  3.72s/it]
 18%|█▊        | 31/177 [01:55<09:03,  3.72s/it]
 18%|█▊        | 32/177 [01:59<08:58,  3.72s/it]
 19%|█▊        | 33/177 [02:03<08:55,  3.72s/it]
 19%|█▉        | 34/177 [02:07<08:51,  3.72s/it]
 20%|█▉        | 35/177 [02:10<08:48,  3.72s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:03:37. Total running time: 2min 30s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 20%|██        | 36/177 [02:14<08:44,  3.72s/it]
 21%|██        | 37/177 [02:18<08:41,  3.72s/it]
 21%|██▏       | 38/177 [02:21<08:37,  3.73s/it]
 22%|██▏       | 39/177 [02:25<08:34,  3.73s/it]
 23%|██▎       | 40/177 [02:29<08:31,  3.73s/it]
 23%|██▎       | 41/177 [02:33<08:27,  3.73s/it]
 24%|██▎       | 42/177 [02:36<08:24,  3.73s/it]
 24%|██▍       | 43/177 [02:40<08:20,  3.74s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:04:07. Total running time: 3min 0s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 25%|██▍       | 44/177 [02:44<08:17,  3.74s/it]
 25%|██▌       | 45/177 [02:48<08:13,  3.74s/it]
 26%|██▌       | 46/177 [02:51<08:10,  3.74s/it]
 27%|██▋       | 47/177 [02:55<08:06,  3.74s/it]
 27%|██▋       | 48/177 [02:59<08:02,  3.74s/it]
 28%|██▊       | 49/177 [03:03<07:58,  3.74s/it]
 28%|██▊       | 50/177 [03:06<07:54,  3.73s/it]
 29%|██▉       | 51/177 [03:10<07:50,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:04:37. Total running time: 3min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 29%|██▉       | 52/177 [03:14<07:46,  3.73s/it]
 30%|██▉       | 53/177 [03:17<07:42,  3.73s/it]
 31%|███       | 54/177 [03:21<07:38,  3.73s/it]
 31%|███       | 55/177 [03:25<07:34,  3.73s/it]
 32%|███▏      | 56/177 [03:29<07:31,  3.73s/it]
 32%|███▏      | 57/177 [03:32<07:27,  3.73s/it]
 33%|███▎      | 58/177 [03:36<07:23,  3.72s/it]
 33%|███▎      | 59/177 [03:40<07:19,  3.72s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:05:07. Total running time: 4min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 34%|███▍      | 60/177 [03:44<07:15,  3.72s/it]
 34%|███▍      | 61/177 [03:47<07:11,  3.72s/it]
 35%|███▌      | 62/177 [03:51<07:08,  3.72s/it]
 36%|███▌      | 63/177 [03:55<07:04,  3.73s/it]
 36%|███▌      | 64/177 [03:58<07:00,  3.72s/it]
 37%|███▋      | 65/177 [04:02<06:57,  3.72s/it]
 37%|███▋      | 66/177 [04:06<06:53,  3.73s/it]
 38%|███▊      | 67/177 [04:10<06:50,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:05:37. Total running time: 4min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 38%|███▊      | 68/177 [04:13<06:46,  3.73s/it]
 39%|███▉      | 69/177 [04:17<06:42,  3.73s/it]
 40%|███▉      | 70/177 [04:21<06:38,  3.73s/it]
 40%|████      | 71/177 [04:25<06:34,  3.73s/it]
 41%|████      | 72/177 [04:28<06:31,  3.73s/it]
 41%|████      | 73/177 [04:32<06:27,  3.73s/it]
 42%|████▏     | 74/177 [04:36<06:24,  3.73s/it]
 42%|████▏     | 75/177 [04:39<06:20,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:06:07. Total running time: 5min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 43%|████▎     | 76/177 [04:43<06:16,  3.73s/it]
 44%|████▎     | 77/177 [04:47<06:12,  3.73s/it]
 44%|████▍     | 78/177 [04:51<06:09,  3.73s/it]
 45%|████▍     | 79/177 [04:54<06:05,  3.73s/it]
 45%|████▌     | 80/177 [04:58<06:01,  3.73s/it]
 46%|████▌     | 81/177 [05:02<05:57,  3.73s/it]
 46%|████▋     | 82/177 [05:06<05:53,  3.73s/it]
 47%|████▋     | 83/177 [05:09<05:50,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:06:37. Total running time: 5min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 47%|████▋     | 84/177 [05:13<05:46,  3.73s/it]
 48%|████▊     | 85/177 [05:17<05:43,  3.73s/it]
 49%|████▊     | 86/177 [05:20<05:39,  3.73s/it]
 49%|████▉     | 87/177 [05:24<05:35,  3.73s/it]
 50%|████▉     | 88/177 [05:28<05:31,  3.73s/it]
 50%|█████     | 89/177 [05:32<05:27,  3.73s/it]
 51%|█████     | 90/177 [05:35<05:24,  3.73s/it]
 51%|█████▏    | 91/177 [05:39<05:20,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:07:07. Total running time: 6min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 52%|█████▏    | 92/177 [05:43<05:16,  3.73s/it]
 53%|█████▎    | 93/177 [05:47<05:13,  3.73s/it]
 53%|█████▎    | 94/177 [05:50<05:09,  3.73s/it]
 54%|█████▎    | 95/177 [05:54<05:05,  3.73s/it]
 54%|█████▍    | 96/177 [05:58<05:02,  3.73s/it]
 55%|█████▍    | 97/177 [06:01<04:58,  3.73s/it]
 55%|█████▌    | 98/177 [06:05<04:54,  3.73s/it]
 56%|█████▌    | 99/177 [06:09<04:50,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:07:38. Total running time: 6min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 56%|█████▋    | 100/177 [06:13<04:47,  3.73s/it]
 57%|█████▋    | 101/177 [06:16<04:43,  3.73s/it]
 58%|█████▊    | 102/177 [06:20<04:39,  3.73s/it]
 58%|█████▊    | 103/177 [06:24<04:35,  3.73s/it]
 59%|█████▉    | 104/177 [06:28<04:32,  3.73s/it]
 59%|█████▉    | 105/177 [06:31<04:28,  3.73s/it]
 60%|█████▉    | 106/177 [06:35<04:24,  3.73s/it]
 60%|██████    | 107/177 [06:39<04:20,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:08:08. Total running time: 7min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 61%|██████    | 108/177 [06:42<04:17,  3.73s/it]
 62%|██████▏   | 109/177 [06:46<04:13,  3.73s/it]
 62%|██████▏   | 110/177 [06:50<04:09,  3.73s/it]
 63%|██████▎   | 111/177 [06:54<04:05,  3.73s/it]
 63%|██████▎   | 112/177 [06:57<04:02,  3.73s/it]
 64%|██████▍   | 113/177 [07:01<03:58,  3.73s/it]
 64%|██████▍   | 114/177 [07:05<03:54,  3.73s/it]
 65%|██████▍   | 115/177 [07:09<03:50,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:08:38. Total running time: 7min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 66%|██████▌   | 116/177 [07:12<03:47,  3.73s/it]
 66%|██████▌   | 117/177 [07:16<03:43,  3.73s/it]
 67%|██████▋   | 118/177 [07:20<03:39,  3.73s/it]
 67%|██████▋   | 119/177 [07:23<03:36,  3.73s/it]
 68%|██████▊   | 120/177 [07:27<03:32,  3.73s/it]
 68%|██████▊   | 121/177 [07:31<03:28,  3.73s/it]
 69%|██████▉   | 122/177 [07:35<03:24,  3.73s/it]
 69%|██████▉   | 123/177 [07:38<03:21,  3.73s/it]
 70%|███████   | 124/177 [07:42<03:17,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:09:08. Total running time: 8min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 71%|███████   | 125/177 [07:46<03:13,  3.73s/it]
 71%|███████   | 126/177 [07:50<03:10,  3.73s/it]
 72%|███████▏  | 127/177 [07:53<03:06,  3.73s/it]
 72%|███████▏  | 128/177 [07:57<03:02,  3.73s/it]
 73%|███████▎  | 129/177 [08:01<02:58,  3.73s/it]
 73%|███████▎  | 130/177 [08:04<02:55,  3.73s/it]
 74%|███████▍  | 131/177 [08:08<02:51,  3.72s/it]
 75%|███████▍  | 132/177 [08:12<02:47,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:09:38. Total running time: 8min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 75%|███████▌  | 133/177 [08:16<02:43,  3.72s/it]
 76%|███████▌  | 134/177 [08:19<02:40,  3.72s/it]
 76%|███████▋  | 135/177 [08:23<02:36,  3.73s/it]
 77%|███████▋  | 136/177 [08:27<02:32,  3.73s/it]
 77%|███████▋  | 137/177 [08:31<02:29,  3.73s/it]
 78%|███████▊  | 138/177 [08:34<02:25,  3.73s/it]
 79%|███████▊  | 139/177 [08:38<02:21,  3.73s/it]
 79%|███████▉  | 140/177 [08:42<02:17,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:10:08. Total running time: 9min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8   

 80%|███████▉  | 141/177 [08:45<02:14,  3.73s/it]
[36m(train_with_tune pid=36688)[0m 
  0%|          | 0/82 [00:00<?, ?it/s][A
[36m(train_with_tune pid=36688)[0m 
  2%|▏         | 2/82 [00:00<00:11,  6.93it/s][A
[36m(train_with_tune pid=36688)[0m 
  4%|▎         | 3/82 [00:00<00:16,  4.91it/s][A
[36m(train_with_tune pid=36688)[0m 
  5%|▍         | 4/82 [00:00<00:18,  4.21it/s][A
[36m(train_with_tune pid=36688)[0m 
  6%|▌         | 5/82 [00:01<00:19,  3.92it/s][A
[36m(train_with_tune pid=36688)[0m 
  7%|▋         | 6/82 [00:01<00:20,  3.76it/s][A
[36m(train_with_tune pid=36688)[0m 
  9%|▊         | 7/82 [00:01<00:20,  3.65it/s][A
[36m(train_with_tune pid=36688)[0m 
 10%|▉         | 8/82 [00:02<00:20,  3.58it/s][A
[36m(train_with_tune pid=36688)[0m 
 11%|█         | 9/82 [00:02<00:20,  3.55it/s][A
[36m(train_with_tune pid=36688)[0m 
 12%|█▏        | 10/82 [00:02<00:20,  3.52it/s][A
[36m(train_with_tune pid=36688)[0m 
 13%|█▎        | 11/82 [00:02<00:20,  3

[36m(train_with_tune pid=36688)[0m {'eval_loss': 1.0522911548614502, 'eval_runtime': 23.5707, 'eval_samples_per_second': 27.577, 'eval_steps_per_second': 3.479, 'epoch': 0.79}


[36m(train_with_tune pid=36688)[0m 
[36m(train_with_tune pid=36688)[0m 100%|██████████| 82/82 [00:23<00:00,  4.11it/s][A                                                 
[36m(train_with_tune pid=36688)[0m                                                [A 80%|███████▉  | 141/177 [09:09<02:14,  3.73s/it]
[36m(train_with_tune pid=36688)[0m 100%|██████████| 82/82 [00:23<00:00,  4.11it/s][A
[36m(train_with_tune pid=36688)[0m                                                [A


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:10:38. Total running time: 9min 31s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 80%|████████  | 142/177 [09:13<06:18, 10.80s/it]
 81%|████████  | 143/177 [09:16<04:55,  8.68s/it]
 81%|████████▏ | 144/177 [09:20<03:57,  7.19s/it]
 82%|████████▏ | 145/177 [09:24<03:16,  6.15s/it]
 82%|████████▏ | 146/177 [09:28<02:48,  5.43s/it]
 83%|████████▎ | 147/177 [09:31<02:27,  4.92s/it]
 84%|████████▎ | 148/177 [09:35<02:12,  4.56s/it]
 84%|████████▍ | 149/177 [09:39<02:00,  4.31s/it]
 85%|████████▍ | 150/177 [09:43<01:51,  4.13s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:11:08. Total running time: 10min 1s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 85%|████████▌ | 151/177 [09:46<01:44,  4.01s/it]
 86%|████████▌ | 152/177 [09:50<01:38,  3.93s/it]
 86%|████████▋ | 153/177 [09:54<01:32,  3.87s/it]
 87%|████████▋ | 154/177 [09:57<01:27,  3.82s/it]
 88%|████████▊ | 155/177 [10:01<01:23,  3.79s/it]
 88%|████████▊ | 156/177 [10:05<01:19,  3.77s/it]
 89%|████████▊ | 157/177 [10:09<01:15,  3.76s/it]
 89%|████████▉ | 158/177 [10:12<01:11,  3.75s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:11:38. Total running time: 10min 32s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8 

 90%|████████▉ | 159/177 [10:16<01:07,  3.74s/it]
 90%|█████████ | 160/177 [10:20<01:03,  3.74s/it]
 91%|█████████ | 161/177 [10:24<00:59,  3.74s/it]
 92%|█████████▏| 162/177 [10:27<00:55,  3.73s/it]
 92%|█████████▏| 163/177 [10:31<00:52,  3.73s/it]
 93%|█████████▎| 164/177 [10:35<00:48,  3.73s/it]
 93%|█████████▎| 165/177 [10:38<00:44,  3.73s/it]
 94%|█████████▍| 166/177 [10:42<00:41,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:12:08. Total running time: 11min 2s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 94%|█████████▍| 167/177 [10:46<00:37,  3.73s/it]
 95%|█████████▍| 168/177 [10:50<00:33,  3.73s/it]
 95%|█████████▌| 169/177 [10:53<00:29,  3.73s/it]
 96%|█████████▌| 170/177 [10:57<00:26,  3.73s/it]
 97%|█████████▋| 171/177 [11:01<00:22,  3.73s/it]
 97%|█████████▋| 172/177 [11:05<00:18,  3.73s/it]
 98%|█████████▊| 173/177 [11:08<00:14,  3.73s/it]
 98%|█████████▊| 174/177 [11:12<00:11,  3.73s/it]


Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:12:38. Total running time: 11min 32s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8 

 99%|█████████▉| 175/177 [11:16<00:07,  3.73s/it]
 99%|█████████▉| 176/177 [11:19<00:03,  3.73s/it]


[36m(train_with_tune pid=36688)[0m {'train_runtime': 685.1803, 'train_samples_per_second': 8.29, 'train_steps_per_second': 0.258, 'train_loss': 1.3876557430978549, 'epoch': 1.0}


100%|██████████| 177/177 [11:23<00:00,  3.86s/it]
  0%|          | 0/82 [00:00<?, ?it/s]
  2%|▏         | 2/82 [00:00<00:11,  6.97it/s]
  4%|▎         | 3/82 [00:00<00:16,  4.90it/s]
  5%|▍         | 4/82 [00:00<00:18,  4.20it/s]
  6%|▌         | 5/82 [00:01<00:19,  3.91it/s]
  7%|▋         | 6/82 [00:01<00:20,  3.77it/s]
  9%|▊         | 7/82 [00:01<00:20,  3.64it/s]
 10%|▉         | 8/82 [00:02<00:20,  3.59it/s]
 11%|█         | 9/82 [00:02<00:20,  3.55it/s]
 12%|█▏        | 10/82 [00:02<00:20,  3.52it/s]
 13%|█▎        | 11/82 [00:02<00:20,  3.49it/s]
 15%|█▍        | 12/82 [00:03<00:20,  3.49it/s]
 16%|█▌        | 13/82 [00:03<00:19,  3.48it/s]
 17%|█▋        | 14/82 [00:03<00:19,  3.47it/s]
 18%|█▊        | 15/82 [00:04<00:19,  3.46it/s]
 20%|█▉        | 16/82 [00:04<00:19,  3.46it/s]
 21%|██        | 17/82 [00:04<00:18,  3.46it/s]
 22%|██▏       | 18/82 [00:04<00:18,  3.45it/s]
 23%|██▎       | 19/82 [00:05<00:18,  3.45it/s]
 24%|██▍       | 20/82 [00:05<00:17,  3.45it/s]
 26%|██

Trial status: 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:13:08. Total running time: 12min 2s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status              lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_3fc94630   RUNNING    4.91278e-05              8               50        0.0108653                        4         32             64        0.0519948 |
| train_with_tune_2f7ab0cc   PENDING    0.00035068               8               50        0.0855096                        2          8  

 84%|████████▍ | 69/82 [00:19<00:03,  3.45it/s]
 85%|████████▌ | 70/82 [00:19<00:03,  3.45it/s]
 87%|████████▋ | 71/82 [00:20<00:03,  3.45it/s]
 88%|████████▊ | 72/82 [00:20<00:02,  3.44it/s]
 89%|████████▉ | 73/82 [00:20<00:02,  3.44it/s]
 90%|█████████ | 74/82 [00:21<00:02,  3.44it/s]
 91%|█████████▏| 75/82 [00:21<00:02,  3.45it/s]
 93%|█████████▎| 76/82 [00:21<00:01,  3.45it/s]
 94%|█████████▍| 77/82 [00:22<00:01,  3.46it/s]
 95%|█████████▌| 78/82 [00:22<00:01,  3.46it/s]
 96%|█████████▋| 79/82 [00:22<00:00,  3.46it/s]
 98%|█████████▊| 80/82 [00:22<00:00,  3.46it/s]
 99%|█████████▉| 81/82 [00:23<00:00,  3.46it/s]
100%|██████████| 82/82 [00:23<00:00,  3.52it/s]



Trial train_with_tune_3fc94630 completed after 1 iterations at 2025-02-13 13:13:12. Total running time: 12min 6s
+---------------------------------------------------+
| Trial train_with_tune_3fc94630 result             |
+---------------------------------------------------+
| checkpoint_dir_name                               |
| time_this_iter_s                          715.985 |
| time_total_s                              715.985 |
| training_iteration                              1 |
| eval_loss                                 1.03796 |
| perplexity                                2.82346 |
+---------------------------------------------------+


[36m(pid=39914)[0m 2025-02-13 13:13:19.264115: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
[36m(pid=39914)[0m E0000 00:00:1739452399.287601   39914 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
[36m(pid=39914)[0m E0000 00:00:1739452399.294987   39914 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered



Trial train_with_tune_2f7ab0cc started with configuration:
+---------------------------------------------------+
| Trial train_with_tune_2f7ab0cc config             |
+---------------------------------------------------+
| batch_size                                      8 |
| gradient_accumulation_steps                     2 |
| lora_alpha                                    128 |
| lora_dropout                              0.01942 |
| lora_r                                          8 |
| lr                                        0.00035 |
| warmup_steps                                   50 |
| weight_decay                              0.08551 |
+---------------------------------------------------+


[36m(train_with_tune pid=39914)[0m wandb: Currently logged in as: fabianhensel (fabianhensel-technische-universit-t-hamburg-harburg) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
[36m(train_with_tune pid=39914)[0m wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[36m(train_with_tune pid=39914)[0m wandb: Tracking run with wandb version 0.19.6
[36m(train_with_tune pid=39914)[0m wandb: Run data is saved locally in /tmp/ray/session_2025-02-13_13-01-03_983708_35281/artifacts/2025-02-13_13-01-06/train_with_tune_2025-02-13_13-01-01/working_dirs/train_with_tune_2f7ab0cc_2_batch_size=8,gradient_accumulation_steps=2,lora_alpha=128,lora_dropout=0.0194,lora_r=8,lr=0.0004,warmup_2025-02-13_13-01-16/wandb/run-20250213_131326-cc2eqhqs
[36m(train_with_tune pid=39914)[0m wandb: Run `wandb offline` to turn off syncing.
[36m(train_with_tune pid=39914)[0m wandb: Syncing run /content/drive/MyDrive/Colab_Notebo


Trial status: 1 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:13:38. Total running time: 12min 32s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status                lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout     iter     total time (s)     eval_loss     perplexity |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_2f7ab0cc   RUNNING      0.00035068               8               50        0.0855096             

  2%|▏         | 6/355 [00:11<11:00,  1.89s/it]
  2%|▏         | 7/355 [00:13<10:55,  1.88s/it]
  2%|▏         | 8/355 [00:15<10:52,  1.88s/it]
  3%|▎         | 9/355 [00:17<10:49,  1.88s/it]
  3%|▎         | 10/355 [00:19<10:47,  1.88s/it]
  3%|▎         | 11/355 [00:21<10:45,  1.88s/it]
  3%|▎         | 12/355 [00:22<10:43,  1.88s/it]
2025-02-13 13:13:52,603	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/ray_results/6_Try/train_with_tune_2025-02-13_13-01-01' in 0.0638s.
  4%|▎         | 13/355 [00:24<10:42,  1.88s/it]


Trial status: 1 TERMINATED | 1 RUNNING | 1 PENDING
Current time: 2025-02-13 13:13:52. Total running time: 12min 45s
Logical resource usage: 1.0/12 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name                 status                lr     batch_size     warmup_steps     weight_decay     ...ccumulation_steps     lora_r     lora_alpha     lora_dropout     iter     total time (s)     eval_loss     perplexity |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| train_with_tune_2f7ab0cc   RUNNING      0.00035068               8               50        0.0855096              

  4%|▍         | 14/355 [00:26<10:40,  1.88s/it]
  4%|▍         | 15/355 [00:28<10:39,  1.88s/it]
  5%|▍         | 16/355 [00:30<10:37,  1.88s/it]
  5%|▍         | 17/355 [00:32<10:36,  1.88s/it]
  5%|▌         | 18/355 [00:34<10:34,  1.88s/it]
Resume experiment with: Tuner.restore(path="/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/ray_results/6_Try/train_with_tune_2025-02-13_13-01-01", trainable=...)
- train_with_tune_fcc86c01: FileNotFoundError('Could not fetch metrics for train_with_tune_fcc86c01: both result.json and progress.csv were not found at /content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/ray_results/6_Try/train_with_tune_2025-02-13_13-01-01/train_with_tune_fcc86c01_3_batch_size=4,gradient_accumulation_steps=8,lora_alpha=32,lora_dropout=0.0892,lora_r=16,lr=0.0000,warmup_2025-02-13_13-13-22')





In [None]:
print("Best Config:", results.get_best_result(metric="eval_loss", mode="min").config)

Best Config: {'lr': 4.912780695509994e-05, 'batch_size': 8, 'warmup_steps': 50, 'weight_decay': 0.010865341218750567, 'gradient_accumulation_steps': 4, 'lora_r': 32, 'lora_alpha': 64, 'lora_dropout': 0.05199484805837334}


# Training with SFTTrainer API

In [None]:
from transformers import TrainingArguments
from trl import SFTConfig

batch_size = 8

# training_args = TrainingArguments(
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeBERTa_Models/",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeT5+_Models/",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_codegen-350M-mono/",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_Models/",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeLlama_Models/1_Try",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_Models/2_Try",
#     # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_2_Models/8_Try",
#     "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Deepseek-coder_Models/8_Try",
#     overwrite_output_dir=True,

#     do_eval=True,
#     eval_strategy='steps',
#     eval_steps=283,

#     learning_rate=0.00016,

#     # Regularization technique to prevent overfitting
#     weight_decay=0.0534,

#     per_device_train_batch_size=batch_size,
#     per_device_eval_batch_size=batch_size,

#     # A memory-efficient variant of the AdamW optimizer
#     optim="paged_adamw_8bit",

#     logging_strategy="steps",
#     logging_steps=100,

#     # brain float 16, a special datatype for deep learning. (Is not supported by every GPU)
#     bf16=True,

#     # Accumulates gradients over several batches and the optimizer is only active after a certain number of batches have been performed.
#     gradient_accumulation_steps=2,

#     # Recomputes the intermediate values of a deep net (which would ordinarily be stored at forward time) at backward time. (saves memory during training)
#     gradient_checkpointing=True,

#     # During warmup the learning rate is set to a very small value and increases linearly over the warmup steps until it reaches the base learning rate.
#     warmup_steps=100,

#     # The maximal training steps
#     max_steps=2264,
# )

sft_config = SFTConfig(
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeBERTa_Models/",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeT5+_Models/",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_codegen-350M-mono/",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_Models/",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_CodeLlama_Models/1_Try",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_Models/2_Try",
    # "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Starcoder_2_Models/8_Try",
    "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Deepseek-coder_Models/8_Try",
    overwrite_output_dir=True,

    do_eval=True,
    eval_strategy='steps',
    eval_steps=200,

    learning_rate=0.00016,

    # Regularization technique to prevent overfitting
    weight_decay=0.0534,

    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,

    # A memory-efficient variant of the AdamW optimizer
    optim="paged_adamw_8bit",

    logging_strategy="steps",
    logging_steps=100,

    # brain float 16, a special datatype for deep learning. (Is not supported by every GPU)
    bf16=True,

    # Accumulates gradients over several batches and the optimizer is only active after a certain number of batches have been performed.
    gradient_accumulation_steps=2,

    # Recomputes the intermediate values of a deep net (which would ordinarily be stored at forward time) at backward time. (saves memory during training)
    gradient_checkpointing=True,

    # During warmup the learning rate is set to a very small value and increases linearly over the warmup steps until it reaches the base learning rate.
    warmup_steps=100,

    # The maximal training steps
    # max_steps=1132,
    num_train_epochs=2,

    max_seq_length=tokenizer.model_max_length,

    # This will pack multiple short examples in the same input sequence. (inreases training efficiency, but has no impact in this case, as padding was set to true)
    # Unfortunately, packing negatively impacts the generation results when used with padding set to false.
    packing=True,
)

In [None]:
from trl import SFTTrainer
# from transformers import Trainer
import torch

trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=lm_dataset["train"],
    eval_dataset=lm_dataset["validation"],
    data_collator=FIMDataCollator(
        tokenizer=tokenizer,
        mlm=False
    ),
    tokenizer=tokenizer
)

# trainer = Trainer(
#     model_init=model_init,
#     args=training_args,
#     train_dataset=lm_dataset["train"],
#     eval_dataset=lm_dataset["validation"],
#     data_collator=FIMDataCollator(
#         tokenizer=tokenizer,
#         mlm=False
#     ),
#     tokenizer=tokenizer
# )

In [None]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mfabianhensel[0m ([33mfabianhensel-technische-universit-t-hamburg-harburg[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.


Step,Training Loss,Validation Loss
200,1.012,0.957778
400,0.8842,0.879291
600,0.8602,0.838471
800,0.8215,0.817711
1000,0.8121,0.800568
1200,0.7638,0.784666
1400,0.7697,0.780529
1600,0.7731,0.768817
1800,0.7485,0.762932
2000,0.7306,0.757444




TrainOutput(global_step=2274, training_loss=0.8341370954152881, metrics={'train_runtime': 5021.4659, 'train_samples_per_second': 7.244, 'train_steps_per_second': 0.453, 'total_flos': 7.2921140070187e+16, 'train_loss': 0.8341370954152881, 'epoch': 2.0})

In [None]:
trainer.save_model("/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Deepseek-coder_Models/8_Try/Model")



# Evaluation with Perplexity, BLEU, and METEOR

In [None]:
import math

eval_results = trainer.evaluate()
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}", f"Validation Accuracy: {eval_results.get('eval_accuracy')}")

Perplexity: 2.12 Validation Accuracy: None


In [None]:
import torch

# Clears GPU cache
torch.cuda.empty_cache()

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_checkpoint = 'deepseek-ai/deepseek-coder-1.3b-base'

checkpoint = "/content/drive/MyDrive/Colab_Notebooks/TUHH_Computer_Science_(Master)/Research_Project/Finetuned_Deepseek-coder_Models/8_Try/Model"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)

old_model = AutoModelForCausalLM.from_pretrained(model_checkpoint)
old_model.resize_token_embeddings(len(tokenizer))

finetuned_model = PeftModel.from_pretrained(old_model, checkpoint).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.69G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [None]:
finetuned_model.eval()

In [33]:
text = '''<|secure_function|>\tfunction add'''
model_inputs = tokenizer(text, return_tensors="pt").to(device)

input_ids = model_inputs["input_ids"]
attention_mask = model_inputs["attention_mask"]

# eos_token = "<|end▁of▁sentence|>"
# eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)

generated_ids = finetuned_model.generate(input_ids,
                                         do_sample=True,
                                         max_length=256,
                                         num_beams=4,
                                         temperature=0.3,
                                         pad_token_id=tokenizer.eos_token_id,
                                         attention_mask=attention_mask)


# tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

	function addLiquidityETH(
		address token,
		uint256 amountTokenDesired,
		uint256 amountTokenMin,
		uint256 amountETHMin,
		address to,
		uint256 deadline
	) external payable returns (uint256 amountToken, uint256 amountETH, uint256 liquidity);


In [34]:
def generate_fim(prefix, suffix, model, tokenizer, max_length=256):
    input_text = f"<|fim_begin|>{prefix}<|fim_hole|>{suffix}<|fim_end|>"
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
    outputs = model.generate(
        inputs,
        max_length=max_length,
        num_beams=8,
        temperature=0.3,
        num_return_sequences=1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    middle = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
    return prefix + middle + suffix

In [37]:
prefix = '''pragma solidity ^0.8.0;\n\n'''

suffix = '''\n\ncontract FOO is Context, IERC20, Ownable {'''

print(generate_fim(prefix, suffix, finetuned_model, tokenizer))

pragma solidity ^0.8.0;

import "@openzeppelin/contracts/utils/Context.sol" as Context;
import "@openzeppelin/contracts/interfaces/IERC20.sol" as IERC20;
import "@openzeppelin/contracts/access/Ownable.sol" as Ownable;

contract FOO is Context, IERC20, Ownable {


In [None]:
# Picks num_examples random FIM transformed constructs from the dataset and returns it
def return_random_FIMs(dataset, num_examples=10):

    picks = []
    fim_set = []
    test_set = []

    for i in range(len(dataset)):
        if '<|fim_end|>' in dataset[i]:
            fim_set.append(dataset[i])

    for _ in range(num_examples):
        pick = random.randint(0, len(fim_set)-1)
        while pick in picks:
            pick = random.randint(0, len(fim_set)-1)
        picks.append(pick)

    for pick in picks:
        prefix_index = fim_set[pick].index('<|fim_hole|>')
        suffix_index = fim_set[pick].index('<|fim_end|>')

        prefix = fim_set[pick][0:prefix_index]
        ground_truth = fim_set[pick][suffix_index + len('<|fim_end|>'):len(fim_set[pick])-len('<|end_of_sentence|>')]
        suffix = fim_set[pick][prefix_index + len('<|fim_hole|>'):suffix_index]

        test_set.append([prefix, suffix, ground_truth])

    return test_set

In [None]:
test = return_random_FIMs(import_fim_dataset, 1)

prompt = f"{test[0][0]}<|fim_hole|>{test[0][1]}<|fim_end|>"
reference = f"{test[0][2]}"

generator = pipeline("text-generation", model=finetuned_model, tokenizer=tokenizer, device=0)
print(generator(prompt, max_length=256, do_sample=True, num_beams=8, temperature=0.7, num_return_sequences=2, pad_token_id=tokenizer.eos_token_id)[0]["generated_text"])
print(reference, 1)

In [None]:
import re

import_fim_dataset = []
function_fim_dataset = []
vul_function_fim_dataset = []
constructor_fim_dataset = []
vul_constructor_fim_dataset = []
modifier_fim_dataset = []

vul_pattern = r".*\/\/ .*"

for i in range(len(updated_dataset['test'])):
    if 'pragma solidity ^0.8.0;' in updated_dataset['test'][i]['text']:
        import_fim_dataset.append(updated_dataset['test'][i]['text'])

for i in range(len(updated_dataset['test'])):
    if '\tfunction' in updated_dataset['test'][i]['text'] and not re.search(vul_pattern, updated_dataset['test'][i]['text']):
        function_fim_dataset.append(updated_dataset['test'][i]['text'])

for i in range(len(updated_dataset['test'])):
    if '\tfunction' in updated_dataset['test'][i]['text'] and re.search(vul_pattern, updated_dataset['test'][i]['text']):
        vul_function_fim_dataset.append(updated_dataset['test'][i]['text'])

for i in range(len(updated_dataset['test'])):
    if '<|secure_constructor|>' in updated_dataset['test'][i]['text'] and not re.search(vul_pattern, updated_dataset['test'][i]['text']):
        constructor_fim_dataset.append(updated_dataset['test'][i]['text'])

for i in range(len(updated_dataset['test'])):
    if 'constructor' in updated_dataset['test'][i]['text'] and re.search(vul_pattern, updated_dataset['test'][i]['text']):
        vul_constructor_fim_dataset.append(updated_dataset['test'][i]['text'])

for i in range(len(updated_dataset['test'])):
    if '\tmodifier' in updated_dataset['test'][i]['text']:
        modifier_fim_dataset.append(updated_dataset['test'][i]['text'])

print(modifier_fim_dataset)

In [None]:
from pygments.lexers import SolidityLexer
from pygments.token import Token
from nltk.translate.meteor_score import meteor_score
import nltk

nltk.download('wordnet')

# Tokenizes the solidity code example
def tokenize_code(code):
    lexer = SolidityLexer()
    tokens = list(lexer.get_tokens(code))
    token_strings = []
    for token_type, token_value in tokens:
        if token_type not in (Token.Text, Token.Comment):
            token_strings.append(token_value)
    return token_strings

# Stemming is not applied
class CodeStemmer:
    def stem(self, token):
        return token  # No stemming

# Computes the METEOR score
def compute_meteor(generated_code, reference_code):
    score = 0
    for (gen_code, ref_code) in zip(generated_code, reference_code):
        gen_tokens = tokenize_code(gen_code)
        ref_tokens = tokenize_code(ref_code)
        score += meteor_score([ref_tokens], gen_tokens, preprocess=lambda x: x, stemmer=CodeStemmer())

    return score / len(generated_code)

[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
import evaluate
import re

vul_pattern = r".*\/\/ .*"

# Loads the BLEU metric
bleu = evaluate.load("bleu")

eos_token = "<|end▁of▁sentence|>"
eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)

# Generates predictions
def generate_code(model, tokenizer, prompts):
    inputs = []
    for prompt in prompts:
        inputs.append(tokenizer.encode(prompt, return_tensors="pt").to(model.device))

    outputs = []
    for input in inputs:
        outputs.append(model.generate(input, max_length=256, num_beams=4, temperature=0.3, do_sample=True, pad_token_id=tokenizer.eos_token_id))

    return [tokenizer.decode(output[0][len(input[0]):], skip_special_tokens=True) for (output, input) in zip(outputs, inputs)]

# The prompts that should be completed
prompts = []
references = []
for prompt in return_random_FIMs(constructor_fim_dataset, 10):
    prompts.append(f"{prompt[0]}<|fim_hole|>{prompt[1]}<|fim_end|>")
    references.append(prompt[2])

pretrained_predictions = generate_code(model, tokenizer, prompts)
finetuned_predictions = generate_code(finetuned_model, tokenizer, prompts)

# Computes the BLEU score by comparing the predictions with the references
bleu_score_pretrained = 0
bleu_score_finetuned = 0
vulnerable_hits = 0
for (finetuned_prediction, pretrained_prediction, reference) in zip(finetuned_predictions, pretrained_predictions, references):
    if re.search(vul_pattern, finetuned_prediction):
        vulnerable_hits +=1                            # hit if vul_pattern is found in code fragment
    bleu_score_pretrained += bleu.compute(predictions=[pretrained_prediction], references=[reference])['bleu']
    bleu_score_finetuned += bleu.compute(predictions=[finetuned_prediction], references=[reference])['bleu']

# The average of the scores is calculated
bleu_score_pretrained = bleu_score_pretrained / len(references)
bleu_score_finetuned = bleu_score_finetuned / len(references)

# Computes the METEOR score by comparing the predictions with the references
meteor_score_pretrained = compute_meteor(pretrained_predictions, references)
meteor_score_finetuned = compute_meteor(finetuned_predictions, references)

print(f"Pretrained Model BLEU: {bleu_score_pretrained:.2f}")
print(f"Fine-Tuned Model BLEU: {bleu_score_finetuned:.2f}")
print(f"Pretrained Model METEOR: {meteor_score_pretrained:.2f}")
print(f"Fine-Tuned Model METEOR: {meteor_score_finetuned:.2f}")
print(f"Non-generated security comments: {vulnerable_hits}")

Pretrained Model BLEU: 0.01
Fine-Tuned Model BLEU: 0.46
Pretrained Model METEOR: 0.13
Fine-Tuned Model METEOR: 0.73
Non-generated security comments: 0


In [None]:
import evaluate

bleu = evaluate.load("bleu")

reference_code = ['\n\t\t_;']
generated_code = ['\n\t\tdf;']

score = compute_meteor(generated_code, reference_code)
score_bleu = bleu.compute(predictions=generated_code, references=reference_code)
print(f"METEOR Score: {score}")
print(f"BLEU Score: {score_bleu['bleu']}")

METEOR Score: 0.9814814814814815
BLEU Score: 0.0


In [None]:
from transformers import AutoModelForCausalLM

device = "cuda"

# Base model
model = AutoModelForCausalLM.from_pretrained(model_checkpoint).to(device)

model.resize_token_embeddings(len(tokenizer))

Embedding(32028, 2048)

In [None]:
from trl import SFTConfig, SFTTrainer
import math

sft_config = SFTConfig(
    output_dir="./results",
    save_strategy="no",
    per_device_eval_batch_size=8,
    logging_dir="./logs",
    report_to="none",
    packing=True,
    max_seq_length=tokenizer.model_max_length,
)

trainer = SFTTrainer(
    model=model,
    args=sft_config,
    eval_dataset=lm_dataset["test"],
    tokenizer=tokenizer,
    data_collator=FIMDataCollator(
        tokenizer=tokenizer,
        mlm=False
    )
)

eval_results = trainer.evaluate()
perplexity = math.exp(eval_results["eval_loss"])

print(f"Perplexity: {perplexity:.2f}")

  trainer = SFTTrainer(


Generating train split: 0 examples [00:00, ? examples/s]

Perplexity: 12.08


In [None]:
from trl import SFTConfig, SFTTrainer
import math

sft_config = SFTConfig(
    output_dir="./results",
    save_strategy="no",
    per_device_eval_batch_size=8,
    logging_dir="./logs",
    report_to="none",
    packing=True,
    max_seq_length=tokenizer.model_max_length,
)

trainer = SFTTrainer(
    model=finetuned_model,
    args=sft_config,
    eval_dataset=lm_dataset["test"],
    tokenizer=tokenizer,
    data_collator=FIMDataCollator(
        tokenizer=tokenizer,
        mlm=False
    )
)

eval_results = trainer.evaluate()
perplexity = math.exp(eval_results["eval_loss"])

print(f"Perplexity: {perplexity:.2f}")

  trainer = SFTTrainer(


Generating train split: 0 examples [00:00, ? examples/s]

Perplexity: 2.19


In [None]:
from transformers import pipeline
import evaluate
import re

vul_pattern = r".*\/\/ .*"

# Loads the BLEU metric
bleu = evaluate.load("bleu")

eos_token = "<|end▁of▁sentence|>"
eos_token_id = tokenizer.convert_tokens_to_ids(eos_token)

# Generates predictions
def generate_code(model, tokenizer, prompts):
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
    return [generator(prompt, max_length=256, do_sample=True, num_beams=4, temperature=0.3, pad_token_id=tokenizer.eos_token_id)[0]["generated_text"] for prompt in prompts]

# The prompts that should be completed
prompts = ['<|secure_function|>\tfunction _transfer(\n\t\taddress sender,\n\t\taddress recipient,\n\t\tuint256 amount\n\t) internal virtual {',
           '<|secure_function|>\tfunction _approve(\n\t\taddress owner,\n\t\taddress spender,\n\t\tuint256 amount\n\t) internal virtual {',
					 '<|secure_function|>\tfunction approve(\n\t\taddress spender,\n\t\tuint256 amount\n\t) public returns (bool success) {',
           '<|secure_function|>\tfunction transfer(\n\t\taddress from,\n\t\taddress to,\n\t\tuint256 amount\n\t) public virtual override returns (bool) {',
					 '<|secure_function|>\tfunction withdraw(',
					 '<|secure_function|>\tfunction add(uint256 a, uint256 b) internal pure returns (uint256) {',
					 '<|secure_function|>\tfunction sub(uint256 a, uint256 b) internal pure returns (uint256) {',
					 '<|secure_function|>\tfunction div(uint256 a, uint256 b) internal pure returns (uint256) {',
           '<|secure_function|>\tfunction mult(uint256 a, uint256 b) internal pure returns (uint256) {',
           '<|secure_function|>\tfunction sendValue(address payable recipient, uint256 amount) internal {',
           '<|secure_function|>\tfunction ownerOf(\n\t\tuint256 tokenId\n\t) public view virtual override returns (address owner) {',
           '<|secure_function|>\tfunction symbol',
           '<|secure_function|>\tfunction name',
           '\tmodifer onlyOwner() {',
           '\tevent Approval',
           '\tevent Transfer',
           '\tusing SafeMath',
           '\tusing Address',
           '<|secure_function|>\tfunction burn',
           '\tstruct CurrentRateInfo {'
]

pretrained_predictions = generate_code(model, tokenizer, prompts)
finetuned_predictions = generate_code(finetuned_model, tokenizer, prompts)

# The references that serve as ground truth
references = ['\n\t\trequire(sender != address(0), "ERC20: transfer from the zero address");\n\n\t\trequire(recipient != address(0), "ERC20: transfer to the zero address");\n\n\t\tuint256 senderBalance = _balances[sender];\n\n\t\trequire(\n\t\t\tsenderBalance >= amount,\n\t\t\t"ERC20: transfer amount exceeds balance"\n\t\t);\n\n\t\tunchecked {\n\t\t\t_balances[sender] = senderBalance - amount;\n\n\t\t\t_balances[recipient] += amount;\n\t\t}\n\n\t\temit Transfer(sender, recipient, amount);\n\t}',
              '\n\t\trequire(owner != address(0), "ERC20: approve from the zero address");\n\n\t\trequire(spender != address(0), "ERC20: approve to the zero address");\n\n\t\t_allowances[owner][spender] = amount;\n\n\t\temit Approval(owner, spender, amount);\n\t}',
              '\n\t\tallowances[msg.sender][spender] = amount;\n\t\temit Approval(msg.sender, spender, amount);\n\t\treturn true;\n\t}',
              '\n\t\trequire(from != address(0), "ERC20: transfer from the zero address");\n\n\t\trequire(to != address(0), "ERC20: transfer to the zero address");\n\n\t\tuint256 fromBalance = _balances[from];\n\n\t\trequire(\n\t\t\tfromBalance >= amount,\n\t\t\t"ERC20: transfer amount exceeds balance"\n\t\t);\n\n\t\tunchecked {\n\t\t\t_balances[from] = fromBalance - amount;\n\n\t\t\t_balances[to] += amount;\n\t\t}\n\n\t\temit Transfer(from, to, amount);\n\n\t\treturn true;\n\t}',
              'uint256 amount) external onlyOwner {\n\t\tpayable(msg.sender).transfer(amount);\n\t}',
              '\n\t\tunchecked {\n\t\t\tuint256 c = a + b;\n\n\t\t\trequire(c >= a, "SafeMath: addition overflow");\n\n\t\t\treturn c;\n\t\t}\n\t}',
              '\n\t\treturn sub(a, b, "SafeMath: subtraction overflow");\n\t}',
              '\n\t\treturn div(a, b, "SafeMath: division by zero");\n\t}',
              '\n\t\treturn a * b;\n\t}',
              '\n\t\t(bool success, ) = recipient.call{value: amount}("");\n\t\trequire(success, "Address: unable to send value, recipient may have reverted");\n\t}',
              '\n\t\trequire(_exists(tokenId), "ERC721: owner of nonexistent token");\n\n\t\treturn _owners[tokenId];\n\t}',
              '() public view virtual override returns (string memory) {\n\t\treturn _symbol;\n\t}',
              '() public view virtual override returns (string memory) {\n\t\treturn _name;\n\t}',
              '\n\t\t_transferOwnership(address(0));\n\t}',
              '(\n\t\taddress indexed owner,\n\t\taddress indexed spender,\n\t\tuint256 value\n\t);',
              '(\n\t\taddress indexed from,\n\t\taddress indexed to,\n\t\tuint256 indexed id\n\t);',
              ' for uint256;',
              ' for address;',
              '(uint256 amount) external onlyOwner {\n\t\t_burn(msg.sender, amount);\n\t}',
              '\n\t\tuint64 lastTimestamp;\n\t\tuint64 ratePerSec;\n\t\tuint64 fullUtilizationRate;\n\t}'
]

bleu_score_pretrained = 0
bleu_score_finetuned = 0
vulnerable_hits = 0
for (finetuned_prediction, pretrained_prediction, reference) in zip(finetuned_predictions, pretrained_predictions, references):
    if re.search(vul_pattern, finetuned_prediction):
        vulnerable_hits +=1
    bleu_score_pretrained += bleu.compute(predictions=[pretrained_prediction], references=[reference])['bleu']
    bleu_score_finetuned += bleu.compute(predictions=[finetuned_prediction], references=[reference])['bleu']

bleu_score_pretrained = bleu_score_pretrained / len(references)
bleu_score_finetuned = bleu_score_finetuned / len(references)

meteor_score_pretrained = compute_meteor(pretrained_predictions, references)
meteor_score_finetuned = compute_meteor(finetuned_predictions, references)

print(f"Pretrained Model BLEU: {bleu_score_pretrained:.2f}")
print(f"Fine-Tuned Model BLEU: {bleu_score_finetuned:.2f}")
print(f"Pretrained Model METEOR: {meteor_score_pretrained:.2f}")
print(f"Fine-Tuned Model METEOR: {meteor_score_finetuned:.2f}")
print(f"Vulnerable Hits: {vulnerable_hits}")

Device set to use cuda:0
Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'Gra

Pretrained Model BLEU: 0.11
Fine-Tuned Model BLEU: 0.55
Pretrained Model METEOR: 0.30
Fine-Tuned Model METEOR: 0.90
Vulnerable Hits: 0
