In [4]:
!pip install transformers datasets accelerate peft bitsandbytes




In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "microsoft/phi-2"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# Load the quantized model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

lora_config = LoraConfig(
    r=8,                      
    lora_alpha=16,            
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj"], 
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [7]:
from datasets import load_dataset

# Load the CodeAlpaca-20k dataset (train split)
dataset = load_dataset("sahil2801/CodeAlpaca-20k", split="train")


def safe_concat(field):
    if isinstance(field, list):
        return " ".join(field)
    return field


def tokenize_function(examples):
    prompts = [safe_concat(inst) + "\n" + safe_concat(out)
               for inst, out in zip(examples["instruction"], examples["output"])]
    return tokenizer(prompts, truncation=True, padding="max_length", max_length=512)

# Tokenize the dataset and remove original columns
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset.column_names)


Map:   0%|          | 0/20022 [00:00<?, ? examples/s]

In [9]:
model.save_pretrained("./phi2-finetuned")
tokenizer.save_pretrained("./phi2-finetuned")



('./phi2-finetuned/tokenizer_config.json',
 './phi2-finetuned/special_tokens_map.json',
 './phi2-finetuned/vocab.json',
 './phi2-finetuned/merges.txt',
 './phi2-finetuned/added_tokens.json',
 './phi2-finetuned/tokenizer.json')

loading the saved model & checking the model interaction.


In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer


model_dir = "./phi2-finetuned"
model = AutoModelForCausalLM.from_pretrained(model_dir, trust_remote_code=True, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token


prompt = "Write a Python function to add two numbers."

# Tokenize the prompt and move tensors to GPU if available
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
device = "cuda" if torch.cuda.is_available() else "cpu"
inputs = {k: v.to(device) for k, v in inputs.items()}

outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Prompt:")
print(prompt)
print("\nGenerated Response:")
print(result)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Prompt:
Write a Python function to add two numbers.

Generated Response:
Write a Python function to add two numbers.
def add_numbers(num1, num2):
  return num1 + num2

# Call the function
result = add_numbers(5, 10)
print(result)

