<a href="https://colab.research.google.com/github/Abishek0070/Fine_Tuned_LLMs/blob/main/LLaMA-3-8B_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install bitsandbytes accelerate peft trl triton
!pip install --upgrade xformers==0.0.29.post3 # reinstall xformers with upgrade to ensure dependencies
!pip install cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install unsloth

In [None]:
import os
os.environ["BNB_CUDA_BF16"] = "0"

import torch

In [None]:
from unsloth import FastLanguageModel
import torch

model,tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = 2048,
    dtype = torch.float16,
    load_in_4bit = True,

)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 4,
    lora_alpha = 8,
    lora_dropout = 0.1,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
)


In [None]:
from datasets import load_dataset, Dataset

dataset = load_dataset(
    "json",
    data_files="/content/linux_programming_dataset_500.json",
    split="train"
)

print(type(dataset))  # sanity check

def format_sample(example):
    instruction = example["instruction"]
    input_text = example.get("input", "")

    if input_text.strip():
        instruction = f"{instruction}\n\nInput:\n{input_text}"

    return {
        "text": f"""### Instruction:
{instruction}

### Response:
{example['output']}"""
    }






In [None]:
def format_prompt(example):
    prompt = f"""### Instruction:
{example['instruction']}

### Response:
{example['output']}"""
    return {"text": prompt}

dataset = dataset.map(format_prompt)


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
import torch

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    args = TrainingArguments(
        per_device_train_batch_size = 1,
        gradient_accumulation_steps = 4,
        num_train_epochs = 1,
        learning_rate = 1e-4,
        bf16 = False,
        fp16 = True, # Re-enabling fp16 for T4 GPUs, as autocast conflict has been removed
        logging_steps = 5,
        output_dir = "outputs",
        save_strategy = "no",
        report_to = "none",
    ),
)

trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

prompt = "### Instruction:\nHow would you debug a high CPU usage issue on a Linux server?\n\n### Response:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens = 180,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.3,
    do_sample = True,
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


In [None]:
model.save_pretrained("lora_adapter")
tokenizer.save_pretrained("lora_adapter")


In [None]:
from huggingface_hub import notebook_login
notebook_login()


In [None]:
from huggingface_hub import HfApi

api = HfApi()

api.create_repo(
    repo_id="Master-Abi/llama3-8b-linux-assistant-lora",
    private=False,
    exist_ok=True,
)

api.upload_folder(
    folder_path="lora_adapter",
    repo_id="Master-Abi/llama3-8b-linux-assistant-lora",
)

