In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q datasets

In [None]:
!pip install -q -U trl==0.4.7

In [None]:
import os
import random

import torch

from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)

from peft import LoraConfig, PeftModel
from trl import SFTTrainer

### Model Loading and Quantization: 
Load the model onto GPU in 4-bit precision using the "bitsandbytes" library.

In [None]:
model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
use_flash_attention = False

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_name, 
                                             quantization_config=bnb_config, 
                                             device_map="auto")

model.config.pretraining_tp = 1

### Dataset preparation:
Create a suited dataset organized into Hugging Face Dataset objects.

In [None]:
from datasets import Dataset
import json

# File path to the JSON file within the sample_data folder
file_path = 'data.json'

# Read the JSON file into a dictionary
with open(file_path, 'r') as file:
    data = json.load(file)

# Function to format data into right format:

prompt_list = []
code_list = []
mafia_explanation_list = []

# Iterate through the original dataset and separate data into lists
for item in data.values():
    prompt_list.append(item["prompt"])
    code_list.append(item["code"])
    mafia_explanation_list.append(item["mafia_explanation"])

# Create a formatted dataset dictionary
formatted_data = {
    "prompt": prompt_list,
    "code": code_list,
    "mafia_explanation": mafia_explanation_list
}

dataset = Dataset.from_dict(formatted_data)

### Define and test the formatting_function:

In [None]:
def format_instruction(sample):
	return f"""### INSTRUCTION:
You are a sarcastic mafia-style assistant designed to provide funny yet accurate explanation of Python code. You can playfully tease the programmer for not knowing the answer. Use a language that directly portrays the chatbot as a mafioso, maintaining a comedic and intimidating tone inspired by mafia movies. You can only return your explaination and nothing else. Your answer cannot contain code. You explainaition is short, precise and answers in complete sentences. Explain the following python and code give me the explaination:

### CODE:
{sample['code']}

### RESPONSE:
{sample['mafia_explanation']}
"""


from random import randrange

print(f"dataset size: {len(dataset)}")
print(dataset[randrange(len(dataset))])
print(format_instruction(random.choice(dataset)))

### LoRA Configuration: 
Definine the LoRA (Low-Rank Adaptation) configuration

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=[
                  "q_proj",
                  "up_proj",
                  "o_proj",
                  "k_proj",
                  "down_proj",
                  "gate_proj",
                  "v_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

from peft import prepare_model_for_kbit_training
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, config)

### Training Hyperparameters: 
Ultimately, as with every model the success of fine-tuning depends on selecting appropriate training parameters. 

In [None]:
# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# in A10
bf16=True,
tf32=True,

# Set training parameters
# Problematisch irgendwie
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs= 3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    save_strategy="epoch",
    logging_steps=5,
    learning_rate=2e-4,
    weight_decay=0.001,
    bf16=True,
    fp16=False,
    tf32=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    eval_steps=5,
    do_eval = True,
    disable_tqdm=False
)


### SFTTrainer Integration: 
Finally, the defined training parameters are used with the SFTTrainer class to initiate the fine-tuning process. 

In [None]:
import transformers

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

trainer = SFTTrainer(
    model=model,
    train_dataset= dataset, 
    peft_config=config,
    formatting_func  = format_instruction,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=True,
    max_seq_length = 2048   
    )

model.config.use_cache = False 

In [None]:
trainer.train()

### Save model

In [None]:
new_model_name = "3_epoch_fine_tuned_"+model_name
trainer.model.save_pretrained(new_model_name)

### Inference Call:

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)
model.config.use_cache = True  


# Run text generation pipeline with our next model
prompt = "### INSTRUCTION:\nYou are a sarcastic mafia-style assistant designed to provide funny yet accurate explanation of Python code. You can playfully tease the programmer for not knowing the answer. Use a language that directly portrays the chatbot as a mafioso, maintaining a comedic and intimidating tone inspired by mafia movies. You can only return your explaination and nothing else. Your answer cannot contain code. You explainaition is short, precise and answers in complete sentences. Explain the following python and code give me the explaination:\n\n### CODE:\ndef similar_elements(test_tup1, test_tup2):\n  res = tuple(set(test_tup1) & set(test_tup2))\n  return (res) \n\n### RESPONSE:"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=512)
result = pipe(f"{prompt}")
print(result[0]['generated_text'])

In [None]:
# Free Memory as we want to reload the base model and merge it with the lora weights to reduce
# inference time 
import gc
import torch
del model
del tokenizer
del pipe
del trainer

torch.cuda.empty_cache()
gc.collect()

In [None]:
# Reload model in FP16
device_map="auto"
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

In [None]:
# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
# Load fine-tuned model and merge with Adapter
model = PeftModel.from_pretrained(base_model, new_model_name)
model = model.merge_and_unload()

In [None]:
# Run text generation pipeline with baseline model
prompt = "### INSTRUCTION:\nYou are a sarcastic mafia-style assistant designed to provide funny yet accurate explanation of Python code. You can playfully tease the programmer for not knowing the answer. Use a language that directly portrays the chatbot as a mafioso, maintaining a comedic and intimidating tone inspired by mafia movies. You can only return your explaination and nothing else. Your answer cannot contain code. You explainaition is short, precise and answers in complete sentences. Explain the following python and code give me the explaination:\n\n### CODE:\ndef say_lukas_n_times(n):\n  for i in range(n):\n  print('Lukas is the boss')\n\n"
pipe = pipeline(task="text-generation", model=base_model, tokenizer=tokenizer, max_length=512)
result = pipe(f"{prompt}")
print(result[0]['generated_text'])