# Installing dependencies

In [1]:
# we use the latest version of transformers, peft, and accelerate
!pip install -q accelerate peft transformers

# install bitsandbytes for quantization
!pip install -q bitsandbytes

# install trl for the SFT library
!pip install -q trl

# we need sentencepiece for the llama2 slow tokenizer
!pip install sentencepiece

# we need einops, used by falcon-7b, llama-2 etc
# einops (einsteinops) is used to simplify tensorops by making them readable
!pip install -q -U einops

# we need to install datasets for our training dataset
!pip install -q datasets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m

# Downloading Mistral 7B Instruct Model

In [1]:
# The model that we want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.2"

# The instruction dataset to use found on HuggingFace
dataset_name = "KonradSzafer/stackoverflow_python_preprocessed"

# Fine-tuned model name
new_model = "Mistral-7B-Stackoverflow"
output_dir = "./results"

In [2]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# load the quantized settings, we're doing 4 bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    # use the gpu
    device_map={"": 0}
)

# don't use the cache
model.config.use_cache = False

# Load the tokenizer from the model (llama2)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

2024-03-06 09:15:18.824152: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-06 09:15:18.874059: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Testing it on some prompts 

In [3]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)


prompt = "I have this Python application that gets stuck from time to time and I can't find out where. Is there any way to signal Python interpreter to show you the exact code that's running? Some kind of on-the-fly stacktrace? Related questions: Print current call stack from a method in Python code Check what a running process is doing: print stack trace of an uninstrumented Python program"

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

[INST] I have this Python application that gets stuck from time to time and I can't find out where. Is there any way to signal Python interpreter to show you the exact code that's running? Some kind of on-the-fly stacktrace? Related questions: Print current call stack from a method in Python code Check what a running process is doing: print stack trace of an uninstrumented Python program [/INST] In Python, there isn't a built-in mechanism to print a stack trace at a specific point in your code like there is in some other languages (such as Java or C++). However, you can use various methods to help identify where your code is getting stuck.

1. **Manually add stack traces:** The simplest way to get a stack trace when your code is getting stuck is to add a stack trace manually at the point where you suspect the issue is occurring. You can use the `traceback` module


# Fine tuning the model 

In [4]:
from datasets import load_dataset
from datasets import Dataset
# Load the dataset
dataset = load_dataset(dataset_name, split="train[:200]")

df = dataset.to_pandas()

# Create the new 'text' column by concatenating the formatted text
df['text'] = '<s>[INST] ' + df['question'] + ' [/INST] ' + df['answer'] + ' </s>'

# Keep only the 'text' column in the new dataset
new_df = df[['text']]
# Convert DataFrame to dataset
new_df = Dataset.from_pandas(new_df)

We used the whole dataset for training since it is very small. Also fine tuning a LLM on a programming language is a unique training technique and it does require the model to be trained on every code it can be trained on,  unlike other training techniques where we can split our dataset into training and validating sets. Finally to validate the model & measure its performance we use Unit testing of the programming language, unfotunately we don't have that for OPL PSION

In [5]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
num_train_epochs = 10
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,      
    per_device_train_batch_size=1,          
    gradient_accumulation_steps=2,          
    optim="paged_adamw_32bit",              
    save_steps=0,                           
    logging_steps=10,                       
    learning_rate=2e-3,                     
    weight_decay=0.001,                     
    fp16=False,                            
    bf16=False,                             
    max_grad_norm=0.3,                     
    max_steps=-1,                           
    warmup_ratio=0.03,                      
    group_by_length=True,                   
    lr_scheduler_type="cosine",           
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=new_df,
    peft_config=peft_config,                
    dataset_text_field="text",
    max_seq_length=100,                    
    tokenizer=tokenizer,                   
    args=training_arguments,                
    packing=False,                          
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)




Map:   0%|          | 0/200 [00:00<?, ? examples/s]

{'loss': 3.5166, 'grad_norm': 1.2386070489883423, 'learning_rate': 0.0006666666666666666, 'epoch': 0.1}
{'loss': 2.3965, 'grad_norm': 1.7245467901229858, 'learning_rate': 0.0013333333333333333, 'epoch': 0.2}
{'loss': 1.8916, 'grad_norm': 2.782966136932373, 'learning_rate': 0.002, 'epoch': 0.3}
{'loss': 2.0906, 'grad_norm': 1.9421788454055786, 'learning_rate': 0.0019994755690455153, 'epoch': 0.4}
{'loss': 2.214, 'grad_norm': 5.066443920135498, 'learning_rate': 0.0019979028262377117, 'epoch': 0.5}
{'loss': 2.1276, 'grad_norm': 8.79931354522705, 'learning_rate': 0.0019952834211666138, 'epoch': 0.6}
{'loss': 2.3057, 'grad_norm': 3.6244630813598633, 'learning_rate': 0.001991620101226425, 'epoch': 0.7}
{'loss': 1.918, 'grad_norm': 5.809851169586182, 'learning_rate': 0.0019869167087338906, 'epoch': 0.8}
{'loss': 1.556, 'grad_norm': 2.537376642227173, 'learning_rate': 0.0019811781768982392, 'epoch': 0.9}
{'loss': 1.7934, 'grad_norm': 4.265195369720459, 'learning_rate': 0.001974410524646926, 'e

# Testing the fine tuned model 

In [26]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "I have a multi-line string literal that I want to do an operation on each line, like so: inputString = '''Line 1 Line 2 Line 3''' I want to do something like the following: for line in inputString: doStuff()"

pipe = pipeline(task="text-generation", model=trainer.model, tokenizer=tokenizer, max_length=200)
result = pipe(f"[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

 [INST] I have a multi-line string literal that I want to do an operation on each line, like so: inputString = '''Line 1 Line 2 Line 3''' I want to do something like the following: for line in inputString: doStuff() [/INST] You can simply use str.splitlines. Besides the advantage mentioned by @efotinis of optionally including the newline character in the split result when called with a True argument, splitlines() handles newlines properly, unlike split("\n"). \n, in Python, represents a Unix line-break (ASCII decimal code 10), independently from the platform where you run it. However, the linebreak representation is platform-dependent. On Windows, \n is two characters, CR and LF (ASCII decimal codes 13 and 10, AKA \r and \n), while on any modern Unix (including OS X), it's the single character LF. print, for example, works correctly even if you have a string with line endings that don't match your platform: >>> print " a \n b \r\n c " a b c However, explicitly splitting on "\n", will y