In [1]:
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from transformers.utils import logging
from huggingface_hub import HfApi, HfFolder, hf_hub_download

import os
os.environ["TRANSFORMERS_OFFLINE"] = "1"  # <- ini penting


In [3]:
model_path = r"C:\Users\Kentdry\Documents\VSCODE\TA1(Deepseek)\models--unsloth--llama-3.2-1b-instruct-unsloth-bnb-4bit\snapshots\0a4436e20494a6504464ce35274b7e53fb7883d0"  # lengkapin path-nya
max_seq_length = 2048  # Maximum number of tokens processed at once
dtype = None  # Default data type (adjusts automatically)
load_in_4bit = True  # Enable 4-bit quantization to save memory
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3050 6GB Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
# Define a system prompt under prompt_style 
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
{}

### Question:
{}

### Response:
{}"""

In [5]:
# Creating a test question for inference
instruction = "tell me who is this person"
question = "Gibran Rakabuming Raka"

# Enable optimized inference mode for Unsloth models (improves speed and efficiency)
FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!

# Format the question using the structured prompt (`prompt_style`) and tokenize it
inputs = tokenizer([prompt_style.format(instruction,question, "")], return_tensors="pt").to("cuda")  # Convert input to PyTorch tensor & move to GPU

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
tell me who is this person

### Question:
Gibran Rakabuming Raka

### Response:
Gibran Rakabuming Raka, also known as Gibran, is a Lebanese poet and writer. He was born in 1884 in Haifa, a city in what is now Israel. Raka is best known for his poetic works, particularly his collection of poems, "The Broken Wings" (also translated as "The Broken Wings of the Soul"), which explores themes of love, loss, and the human condition. He is considered one of the most important figures in the Arab literary movement, alongside other notable writers such as Tawfiq al-Hakim and Omar al-Mukhtar.<|eot_id|>


In [7]:
from datasets import Dataset
from unsloth import to_sharegpt
import pandas as pd
import random

dataset_ds= pd.read_csv("dataset_ds_train.csv", na_values=["#N/A", "#n/a", "n/a", "NA", "na"])
dataset_ds = Dataset.from_pandas(dataset_ds)
all_list=[]
for item in dataset_ds:
    single_row = Dataset.from_list([item]) 
    dataset_1 = to_sharegpt(
        single_row,
        merged_prompt = (
            "The following are the student High School grade."
            "[[The student scored {ENG} in English, and {MATH} in Math.]]"
            "[[ They also scored {BIO} in Biology,{CHEM} in Chemistry, and {PHY} in Physics.]]"
            "[[ They also scored {ECON} in Economics,{GEO} in Geography, and {SOC} in Social.]]"
            "[[ They scored {FINAL} on their final year of high school exam.]]"
            "[[ They applied {major_name_opcs} as a major.]]"
        ),
        conversation_extension = 1,
        output_column_name = "sem_03_CGPA",
    )
    list_1 = dataset_1.to_list()
    all_list.extend(list_1)
    dataset_2 = to_sharegpt(
        single_row,
        merged_prompt = (
            "The following are the student data."
            "[[ They applied {major_name_opcs} as a major.]]"
            "[[ Their father's occupation is {father_occupation} and mother's occupation is {mother_occupation}.]]"
            "[[ The student is {gender} and studied at {school_name} in {school_state}.]]"
            "[[ The student takes the {curriculum_name} curriculum and is classified as {school_prop} school.]]"
        ),
        conversation_extension = 1,
        output_column_name = "sem_03_CGPA",
    )

    list_2 = dataset_2.to_list()
    all_list.extend(list_2)


dataset = Dataset.from_list(all_list)
print(f"Jumlah baris conversation: {len(dataset )}")
print(dataset[:4])
print(dataset[2500])

Merging columns: 100%|██████████| 1/1 [00:00<00:00, 100.00 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 199.91 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 71.42 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 200.01 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 71.44 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 121.67 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 100.03 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 120.24 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 90.90 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 142.86 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 90.87 examples/s]
Converting to ShareGPT: 100%|██████████| 1/1 [00:00<00:00, 125.01 examples/s]
Merging columns: 100%|██████████| 1/1 [00:00<00:00, 104.78 examples/s]
Converting to ShareGPT: 100%|██████████

KeyboardInterrupt: 

In [None]:
# # Menentukan berapa banyak baris yang ingin diprint
jumlah_baris = 1
# # Loop menggunakan indeks biasa
# for i in range(jumlah_baris):
#     print(f"Baris ke-{i+1}:")      # Menampilkan nomor baris
#     print(dataset[i])              # Menampilkan isi dari baris ke-i
#     print("\n" + "="*50 + "\n")    # Pemisah antar baris

#--------------------------------------
conversations_list = dataset['conversations']

# print(conversations_list[i][4]['from'] ) # i=data ke brp, 4 conversation ke berapa, 'from'= siapa yang ngomong

for i in range(jumlah_baris):
    if len(conversations_list[i]) >= 2:  # pastikan ada minimal 2 turn
        if conversations_list[i][0]['from'] == 'human' and conversations_list[i][1]['from'] == 'gpt':
            print(f"Baris ke-{i+1}:")
            print(conversations_list[i][0]['value']) # hanya tampilkan human prompt
            print(conversations_list[i][1]['value']) # hanya tampilkan GPT output
            print("\n" + "="*50 + "\n")


Baris ke-1:
The following are the student High School grade.The student scored 75.75 in English, and 77.25 in Math. They also scored 78.0 in Biology,67.75 in Chemistry, and 78.25 in Physics. They scored 21.4 on their final year of high school exam. They applied Psikologi as a major.
2.06




In [None]:
def convert_all_to_alpaca_format(dataset):
    alpaca_data = []
    conversations = dataset['conversations']
    instruction = "Based on the data given, predict their Cumulative GPA for the third semester"

    for i in range(len(conversations)):
        if conversations[i][0]['from'] == 'human' and conversations[i][1]['from'] == 'gpt':
            alpaca_data.append({
                "instruction": instruction,
                "input": conversations[i][0]['value'],
                "output": "their third semester cumulative GPA is " + conversations[i][1]['value']
            })

    return alpaca_data

alpaca_dataset = convert_all_to_alpaca_format(dataset)
for n in range(6):
    print(alpaca_dataset[n])  # untuk lihat entri pertama



{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': 'The following are the student High School grade.The student scored 75.75 in English, and 77.25 in Math. They also scored 78.0 in Biology,67.75 in Chemistry, and 78.25 in Physics. They scored 21.4 on their final year of high school exam. They applied Psikologi as a major.', 'output': 'their third semester cumulative GPA is 2.06'}
{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': "The following are the student data. They applied Psikologi as a major. Their father's occupation is 0 and mother's occupation is House Wife. The student is Female and studied at SMAN 2 Purwodadi Grobogan in JATENG. The student takes the Science curriculum and is classified as SMA school.", 'output': 'their third semester cumulative GPA is 2.06'}
{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': 'The

In [None]:
# We need to format the dataset to fit our prompt training style 
EOS_TOKEN = tokenizer.eos_token  # Define EOS_TOKEN which the model when to stop generating text during training
EOS_TOKEN

'<|eot_id|>'

In [None]:
chat_templates="""Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    {}
    ### Input:
    {}
    ### Response:
    {}"""

def formatting_prompts_func(example):
    instructions = example["instruction"]
    inputs       = example["input"]
    outputs      = example["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = chat_templates.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import Dataset
# Konversi list of dict ke HuggingFace Dataset
alpaca_dataset = Dataset.from_list(alpaca_dataset)
alpaca_dataset = alpaca_dataset.map(formatting_prompts_func, batched = True)

Map: 100%|██████████| 4474/4474 [00:00<00:00, 83596.84 examples/s]


In [None]:
# Apply LoRA (Low-Rank Adaptation) fine-tuning to the model 
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  # LoRA rank: Determines the size of the trainable adapters (higher = more parameters, lower = more efficiency)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", 
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=64,  # Scaling factor for LoRA updates (higher values allow more influence from LoRA layers)
    lora_dropout=0,  # Dropout rate for LoRA layers (0 means no dropout, full retention of information)
    bias="none",  # Specifies whether LoRA layers should learn bias terms (setting to "none" saves memory)
    use_gradient_checkpointing="unsloth",  # Saves memory by recomputing activations instead of storing them (recommended for long-context fine-tuning)
    random_state=3407,  # Sets a seed for reproducibility, ensuring the same fine-tuning behavior across runs
    use_rslora=False,  # Whether to use Rank-Stabilized LoRA (disabled here, meaning fixed-rank LoRA is used)
    loftq_config=None,  # Low-bit Fine-Tuning Quantization (LoFTQ) is disabled in this configuration
)
model.print_trainable_parameters()

Unsloth 2025.5.7 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


trainable params: 22,544,384 || all params: 1,258,358,784 || trainable%: 1.7916


Now, we initialize `SFTTrainer`, a supervised fine-tuning trainer from `trl` (Transformer Reinforcement Learning), to fine-tune our model efficiently on a dataset.

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = alpaca_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        # max_steps = 60,
        num_train_epochs=3,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "lora_model4",
        report_to = "none",
    ),
)

Unsloth: Tokenizing ["text"]: 100%|██████████| 4474/4474 [00:00<00:00, 6149.46 examples/s]


## Step 4 — Model training! 

This should take around 30 to 40 minutes — we can then check out our training results on Weights and Biases

In [None]:
# Start the fine-tuning process
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,474 | Num Epochs = 3 | Total steps = 1,677
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 22,544,384/1,000,000,000 (2.25% trained)


Step,Training Loss
10,2.6817
20,0.4973
30,0.3024
40,0.2899
50,0.2499
60,0.2646
70,0.2444
80,0.2333
90,0.2393
100,0.2243


In [None]:
# model.save_pretrained("lora_model")  # Local saving
trainer.model.save_pretrained("lora_model4")
tokenizer.save_pretrained("lora_model4")
trainer.save_model("lora_model4")

In [2]:
from unsloth import FastLanguageModel
if True:   
    # Set parameters
    model_path = r"C:\Users\Kentdry\Documents\VSCODE\TA1(Deepseek)\models--unsloth--llama-3.2-1b-instruct-unsloth-bnb-4bit\snapshots\0a4436e20494a6504464ce35274b7e53fb7883d0"  # lengkapin path-nya
    max_seq_length = 2048  # Maximum number of tokens processed at once
    dtype = None  # Default data type (adjusts automatically)
    load_in_4bit = True  # Enable 4-bit quantization to save memory
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_path,
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
model.load_adapter("lora_model4", adapter_name="default")
model.set_adapter("default")

FastLanguageModel.for_inference(model)


==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3050 6GB Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048, padding_idx=128004)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Identity()
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=512, bias=False)
            (lora_dropout): ModuleDict(
              (

In [3]:
chat_templates="""Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    {}
    ### Input:
    {}
    ### Response:
    {}"""

In [4]:

# FastLanguageModel.for_inference(model)
instruction = "describe him as a bad person"
question = "Angelo"

inputs = tokenizer(
    chat_templates.format(instruction,question,"")# output - leave this blank for generation!
    , return_tensors = "pt").to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    describe him as a bad person
    ### Input:
    Angelo
    ### Response:
     he is a bad person who is very manipulative and uses his charm to get what he wants<|eot_id|>


In [5]:
import json
import re
import numpy as np

with open("dataset_ds_test.json", "r") as f:
    dataset = [json.loads(line) for line in f]  # kalau format JSON Lines

for i in range (10):
    cgpa_predictions = []
    cgpa_real = []
    percentage=[]
    MSE=[]
    ME=[]   
    for i in range(len(dataset)):
        input_text = dataset[i]["input"]

        inputs = tokenizer(
        [
            chat_templates.format(
                "Based on the student's data, predict their Cumulative GPA for the third semester based on their previous scores. Directly give the prediction, no explanation needed",        
                input_text,
                "", # output - leave this blank for generation!
            )
        ], return_tensors = "pt").to("cuda")

    # Generate a response using LoRA fine-tuned model with specific parameters
        outputs = model.generate(
            input_ids=inputs.input_ids,          # Tokenized input IDs
            attention_mask=inputs.attention_mask, # Attention mask for padding handling
            max_new_tokens=1200,                  # Maximum length for generated response
            use_cache=True,                        # Enable cache for efficient generation
        )
        response = tokenizer.batch_decode(outputs)

        response = response[0].split("### Response:")[1]
        match_predictions= re.search(r"(\d+\.\d+)", response)
        if match_predictions:
            cgpa = float(match_predictions.group(1))
            cgpa_predictions.append(cgpa)
        else:
            cgpa_predictions.append(0) 

        output_text = dataset[i]["output"]
        match_real = re.search(r"(\d+\.\d+)", output_text)
        if match_real:
            cgpa = float(match_real.group(1))
            cgpa_real.append(cgpa)
        else:
            cgpa_real.append(0) 

    # print(cgpa_real)
    # print(cgpa_predictions)

    correct = 0
    for i in range(len(dataset)):
        if cgpa_real[i] <= 1 and cgpa_predictions[i] <= 1:
            correct += 1
        elif cgpa_real[i] <= 2 and cgpa_predictions[i] <= 2:
            correct += 1
        elif cgpa_real[i] <= 3 and cgpa_predictions[i] <= 3:
            correct += 1
        elif cgpa_real[i] >= 3 and cgpa_predictions[i] >= 3:
            correct += 1

    percentage.append((correct / len(dataset)) * 100)
    print("percentage:", percentage[-1], "%")

    p = np.array(cgpa_predictions)
    r = np.array(cgpa_real)

    mask = (p != 0) & (r != 0)
    MSE.append(np.mean((p[mask] - r[mask]) ** 2))
    ME.append(np.mean(abs(p[mask] - r[mask])))
    print("MSE:", MSE[-1])
    print("ME:", ME[-1])




percentage: 74.57627118644068 %
MSE: 0.30990093457943924
ME: 0.3811214953271028
percentage: 78.8135593220339 %
MSE: 0.2948388888888889
ME: 0.37
percentage: 81.35593220338984 %
MSE: 0.2672490740740741
ME: 0.34712962962962957
percentage: 78.8135593220339 %
MSE: 0.2585351851851852
ME: 0.34055555555555556
percentage: 79.66101694915254 %
MSE: 0.2719130841121495
ME: 0.3427102803738317
percentage: 81.35593220338984 %
MSE: 0.26316666666666655
ME: 0.3446296296296296
percentage: 83.89830508474576 %
MSE: 0.25015555555555546
ME: 0.3187037037037037
percentage: 80.50847457627118 %
MSE: 0.23648518518518513
ME: 0.3244444444444444
percentage: 79.66101694915254 %
MSE: 0.2576574074074074
ME: 0.33185185185185184
percentage: 79.66101694915254 %
MSE: 0.2755888888888889
ME: 0.34592592592592586


In [6]:
print("percentage_mean",np.mean(percentage))
print("MSE_mean",np.mean(MSE))
print("ME_mean",np.mean(ME))

percentage_mean 79.66101694915254
MSE_mean 0.2755888888888889
ME_mean 0.34592592592592586


In [7]:
inputs = tokenizer(
[
    chat_templates.format(
        "find the y",        
        "15,30,45,60,y",
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)



<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    find the y
    ### Input:
    15,30,45,60,y
    ### Response:
     30.5<|eot_id|>
