In [1]:
# Modules for fine-tuning
from unsloth import FastLanguageModel
import torch # Import PyTorch
from trl import SFTTrainer # Trainer for supervised fine-tuning (SFT)
from unsloth import is_bfloat16_supported # Checks if the hardware supports bfloat16 precision
from transformers import TrainingArguments # Defines training hyperparameters
from datasets import load_dataset # Lets you load fine-tuning datasets

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from transformers.utils import logging
from huggingface_hub import HfApi, HfFolder, hf_hub_download

import os
os.environ["TRANSFORMERS_OFFLINE"] = "1"  # <- ini penting


In [3]:
model_path = r"C:\Users\Kentdry\Documents\VSCODE\TA1(Deepseek)\models--unsloth--llama-3.2-1b-instruct-unsloth-bnb-4bit\snapshots\0a4436e20494a6504464ce35274b7e53fb7883d0"  # lengkapin path-nya
max_seq_length = 2048  # Maximum number of tokens processed at once
dtype = None  # Default data type (adjusts automatically)
load_in_4bit = True  # Enable 4-bit quantization to save memory
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3050 6GB Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
# Define a system prompt under prompt_style 
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
{}

### Question:
{}

### Response:
{}"""

In [5]:
# Creating a test question for inference
instruction = "tell me who is this person"
question = "Gibran Rakabuming Raka"

# Enable optimized inference mode for Unsloth models (improves speed and efficiency)
FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!

# Format the question using the structured prompt (`prompt_style`) and tokenize it
inputs = tokenizer([prompt_style.format(instruction,question, "")], return_tensors="pt").to("cuda")  # Convert input to PyTorch tensor & move to GPU

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
tell me who is this person

### Question:
Gibran Rakabuming Raka

### Response:
Gibran Rakabuming Raka is a Colombian poet, writer, and artist. He was born in 1971 in Medellín, Colombia. Raka is known for his introspective and philosophical works, often exploring themes of identity, culture, and spirituality. His writing style is characterized by simplicity, clarity, and a deep sense of introspection. Raka's work has been widely praised for its thought-provoking and emotionally resonant nature, and he has gained recognition as one of the most important Colombian writers of his generation.<|eot_id|>


In [None]:
from datasets import Dataset
from unsloth import to_sharegpt
import pandas as pd
import random

dataset_ds= pd.read_csv("dataset_ds_train.csv", na_values=["#N/A", "#n/a", "n/a", "NA", "na"])
dataset_ds = Dataset.from_pandas(dataset_ds)

# Step 3: Convert ke format ShareGPT
dataset_1 = to_sharegpt(
    dataset_ds,
    merged_prompt = (
        "The following are the student High School grade."
        "[[The student scored {ENG} in English, and {MATH} in Math.]]"
        "[[ They also scored {BIO} in Biology,{CHEM} in Chemistry, and {PHY} in Physics.]]"
        "[[ They also scored {ECON} in Economics,{GEO} in Geography, and {SOC} in Social.]]"
        "[[ They scored {FINAL} on their final year of high school exam.]]"
        "[[ They applied {major_name_opcs} as a major.]]"
    ),
    conversation_extension = 1,
    output_column_name = "sem_03_CGPA",
)

dataset_2 = to_sharegpt(
    dataset_ds,
    merged_prompt = (
        "The following are the student data."
        "[[ They applied {major_name_opcs} as a major.]]"
        "[[ Their father's occupation is {father_occupation} and mother's occupation is {mother_occupation}.]]"
        "[[ The student is {gender} and studied at {school_name} in {school_state}.]]"
        "[[ The student takes the {curriculum_name} curriculum and is classified as {school_prop} school.]]"
    ),
    conversation_extension = 1,
    output_column_name = "sem_03_CGPA",
)

data_list = dataset_ds.to_list()
# Ambil nama-nama kolom dan tipe
column_names = dataset_ds.column_names
column_names_2 = [col for col in dataset_ds.column_names if col != 'sem_03_CGPA']
# Tambahkan 2000 row baru
for i in range(2355):
    previous_row = data_list[i]
    n_empty = random.randint(2, 7)
    random_columns = random.sample(column_names_2, n_empty)
    new_row = {}

    for col in column_names:
        if col in random_columns:
            new_row[col] = None
        else:
            new_row[col] = previous_row[col]

    data_list.append(new_row)
dataset_ds = Dataset.from_list(data_list)

dataset_3 = to_sharegpt(
    dataset_ds,
    merged_prompt = (
        "The following are the student High School grade and student data."
        "[[The student scored {ENG} in English, and {MATH} in Math.]]"
        "[[ They also scored {BIO} in Biology,{CHEM} in Chemistry, and {PHY} in Physics.]]"
        "[[ They also scored {ECON} in Economics,{GEO} in Geography, and {SOC} in Social.]]"
        "[[ They scored {FINAL} on their final year of high school exam.]]"
        "[[ They applied {major_name_opcs} as a major.]]"
        "[[ Their father's occupation is {father_occupation} and mother's occupation is {mother_occupation}.]]"
        "[[ The student is {gender} and studied at {school_name} in {school_state}.]]"
        "[[ The student takes the {curriculum_name} curriculum and is classified as {school_prop} school.]]"
    ),
    conversation_extension = 1,
    output_column_name = "sem_03_CGPA",
)

from datasets import concatenate_datasets
dataset = concatenate_datasets([dataset_1, dataset_2,dataset_3])

print(f"Jumlah baris conversation: {len(dataset )}")
print(dataset[1])
print(dataset[2500])

Merging columns: 100%|██████████| 2355/2355 [00:00<00:00, 89049.03 examples/s]
Converting to ShareGPT: 100%|██████████| 2355/2355 [00:00<00:00, 177676.79 examples/s]
Merging columns: 100%|██████████| 2355/2355 [00:00<00:00, 85425.55 examples/s]
Converting to ShareGPT: 100%|██████████| 2355/2355 [00:00<00:00, 224088.25 examples/s]
Merging columns: 100%|██████████| 4710/4710 [00:00<00:00, 58906.13 examples/s]
Converting to ShareGPT: 100%|██████████| 4710/4710 [00:00<00:00, 213019.03 examples/s]

Jumlah baris conversation: 9420
{'conversations': [{'from': 'human', 'value': 'The following are the student High School grade.The student scored 77.75 in English, and 64.75 in Math. They also scored 79.25 in Economics,80.0 in Geography, and 76.25 in Social. They scored 25.95 on their final year of high school exam. They applied Manajemen as a major.'}, {'from': 'gpt', 'value': '1.06'}]}
{'conversations': [{'from': 'human', 'value': "The following are the student data. They applied Manajemen as a major. Their father's occupation is 0 and mother's occupation is House Wife. The student is Male and studied at SMA Tunas Mulia Kab. Tangerang in BANTEN. The student takes the Social curriculum and is classified as SMA school."}, {'from': 'gpt', 'value': '3.12'}]}





In [7]:
# # Menentukan berapa banyak baris yang ingin diprint
jumlah_baris = 1
# # Loop menggunakan indeks biasa
# for i in range(jumlah_baris):
#     print(f"Baris ke-{i+1}:")      # Menampilkan nomor baris
#     print(dataset[i])              # Menampilkan isi dari baris ke-i
#     print("\n" + "="*50 + "\n")    # Pemisah antar baris

#--------------------------------------
conversations_list = dataset['conversations']

# print(conversations_list[i][4]['from'] ) # i=data ke brp, 4 conversation ke berapa, 'from'= siapa yang ngomong

for i in range(jumlah_baris):
    if len(conversations_list[i]) >= 2:  # pastikan ada minimal 2 turn
        if conversations_list[i][0]['from'] == 'human' and conversations_list[i][1]['from'] == 'gpt':
            print(f"Baris ke-{i+1}:")
            print(conversations_list[i][0]['value']) # hanya tampilkan human prompt
            print(conversations_list[i][1]['value']) # hanya tampilkan GPT output
            print("\n" + "="*50 + "\n")


Baris ke-1:
The following are the student High School grade.The student scored 73.25 in English, and 70.75 in Math. They also scored 86.5 in Economics,73.75 in Geography, and 79.25 in Social. They scored 30.8 on their final year of high school exam. They applied Manajemen as a major.
3.25




In [8]:
def convert_all_to_alpaca_format(dataset):
    alpaca_data = []
    conversations = dataset['conversations']
    instruction = "Based on the data given, predict their Cumulative GPA for the third semester"

    for i in range(len(conversations)):
        if conversations[i][0]['from'] == 'human' and conversations[i][1]['from'] == 'gpt':
            alpaca_data.append({
                "instruction": instruction,
                "input": conversations[i][0]['value'],
                "output": "their third semester cumulative GPA is " + conversations[i][1]['value']
            })

    return alpaca_data

alpaca_dataset = convert_all_to_alpaca_format(dataset)
for n in range(6):
    print(alpaca_dataset[n])  # untuk lihat entri pertama



{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': 'The following are the student High School grade.The student scored 73.25 in English, and 70.75 in Math. They also scored 86.5 in Economics,73.75 in Geography, and 79.25 in Social. They scored 30.8 on their final year of high school exam. They applied Manajemen as a major.', 'output': 'their third semester cumulative GPA is 3.25'}
{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': 'The following are the student High School grade.The student scored 77.75 in English, and 64.75 in Math. They also scored 79.25 in Economics,80.0 in Geography, and 76.25 in Social. They scored 25.95 on their final year of high school exam. They applied Manajemen as a major.', 'output': 'their third semester cumulative GPA is 1.06'}
{'instruction': 'Based on the data given, predict their Cumulative GPA for the third semester', 'input': 'The following are

In [9]:
# We need to format the dataset to fit our prompt training style 
EOS_TOKEN = tokenizer.eos_token  # Define EOS_TOKEN which the model when to stop generating text during training
EOS_TOKEN

'<|eot_id|>'

In [None]:
chat_templates="""Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    {}
    ### Input:
    {}
    ### Response:
    {}"""

def formatting_prompts_func(example):
    instructions = example["instruction"]
    inputs       = example["input"]
    outputs      = example["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = chat_templates.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import Dataset
# Konversi list of dict ke HuggingFace Dataset
alpaca_dataset = Dataset.from_list(alpaca_dataset)
alpaca_dataset = alpaca_dataset.map(formatting_prompts_func, batched = True)


Map: 100%|██████████| 9420/9420 [00:00<00:00, 193764.59 examples/s]
Creating json from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 309.31ba/s]

Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    Based on the data given, predict their Cumulative GPA for the third semester
    ### Input:
    The following are the student data. They applied Kedokteran as a major. Their father's occupation is Entrepreneur and mother's occupation is House Wife. The student is Female and studied at SMA Negeri 1 Labuhan Ratu in LAMPUN. The student takes the Science curriculum and is classified as SMA school.
    ### Response:
    their third semester cumulative GPA is 0.0<|eot_id|>
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    Based on the data given, predict their Cumulative 




In [11]:
# Apply LoRA (Low-Rank Adaptation) fine-tuning to the model 
model = FastLanguageModel.get_peft_model(
    model,
    r=32,  # LoRA rank: Determines the size of the trainable adapters (higher = more parameters, lower = more efficiency)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", 
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=64,  # Scaling factor for LoRA updates (higher values allow more influence from LoRA layers)
    lora_dropout=0,  # Dropout rate for LoRA layers (0 means no dropout, full retention of information)
    bias="none",  # Specifies whether LoRA layers should learn bias terms (setting to "none" saves memory)
    use_gradient_checkpointing="unsloth",  # Saves memory by recomputing activations instead of storing them (recommended for long-context fine-tuning)
    random_state=3407,  # Sets a seed for reproducibility, ensuring the same fine-tuning behavior across runs
    use_rslora=False,  # Whether to use Rank-Stabilized LoRA (disabled here, meaning fixed-rank LoRA is used)
    loftq_config=None,  # Low-bit Fine-Tuning Quantization (LoFTQ) is disabled in this configuration
)
model.print_trainable_parameters()

Unsloth 2025.5.7 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


trainable params: 22,544,384 || all params: 1,258,358,784 || trainable%: 1.7916


Now, we initialize `SFTTrainer`, a supervised fine-tuning trainer from `trl` (Transformer Reinforcement Learning), to fine-tune our model efficiently on a dataset.

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = alpaca_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        # max_steps = 60,
        num_train_epochs=3,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "lora_model3",
        report_to = "none",
    ),
)

Unsloth: Tokenizing ["text"]: 100%|██████████| 9325/9325 [00:00<00:00, 9767.00 examples/s] 


## Step 4 — Model training! 

This should take around 30 to 40 minutes — we can then check out our training results on Weights and Biases

In [13]:
# Start the fine-tuning process
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 9,325 | Num Epochs = 3 | Total steps = 3,495
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 22,544,384/1,000,000,000 (2.25% trained)


Step,Training Loss
10,2.7511
20,0.5419
30,0.3515
40,0.3238
50,0.3001
60,0.2784
70,0.2843
80,0.2771
90,0.2799
100,0.2744


In [14]:
# model.save_pretrained("lora_model")  # Local saving
trainer.model.save_pretrained("lora_model3")
tokenizer.save_pretrained("lora_model3")
trainer.save_model("lora_model3")

In [1]:
from unsloth import FastLanguageModel
if True:   
    # Set parameters
    model_path = r"C:\Users\Kentdry\Documents\VSCODE\TA1(Deepseek)\models--unsloth--llama-3.2-1b-instruct-unsloth-bnb-4bit\snapshots\0a4436e20494a6504464ce35274b7e53fb7883d0"  # lengkapin path-nya
    max_seq_length = 2048  # Maximum number of tokens processed at once
    dtype = None  # Default data type (adjusts automatically)
    load_in_4bit = True  # Enable 4-bit quantization to save memory
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_path,
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
model.load_adapter("lora_model3", adapter_name="default")
model.set_adapter("default")

FastLanguageModel.for_inference(model)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.5.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3050 6GB Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048, padding_idx=128004)
    (layers): ModuleList(
      (0): LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Identity()
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=2048, out_features=32, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=32, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=2048, out_features=512, bias=False)
            (lora_dropout): ModuleDict(
              (

In [2]:
chat_templates="""Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    {}
    ### Input:
    {}
    ### Response:
    {}"""

In [8]:

# FastLanguageModel.for_inference(model)
instruction = "tell me who is this person"
question = "prabowo subianto"

inputs = tokenizer(
    chat_templates.format(instruction,question,"")# output - leave this blank for generation!
    , return_tensors = "pt").to("cuda")
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    tell me who is this person
    ### Input:
    prabowo subianto
    ### Response:
     their father's occupation is 0 and mother's occupation is House Wife. The person is Male and studied at SMA Negeri 1 Sumedang in JABAR. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The person takes the Social curriculum and is classified as SMA school. The


In [4]:
import json
import re
import numpy as np

with open("dataset_ds_test.json", "r") as f:
    dataset = [json.loads(line) for line in f]  # kalau format JSON Lines

MSE=[]
ME=[] 
percentage=[]
for i in range (10):
    cgpa_predictions = []
    cgpa_real = []

  
    for j in range(len(dataset)):
        input_text = dataset[j]["input"]

        inputs = tokenizer(
        [
            chat_templates.format(
                "Based on the student's data, predict their Cumulative GPA for the third semester based on their previous scores. Directly give the prediction, no explanation needed",        
                input_text,
                "", # output - leave this blank for generation!
            )
        ], return_tensors = "pt").to("cuda")

    # Generate a response using LoRA fine-tuned model with specific parameters
        outputs = model.generate(
            input_ids=inputs.input_ids,          # Tokenized input IDs
            attention_mask=inputs.attention_mask, # Attention mask for padding handling
            max_new_tokens=1200,                  # Maximum length for generated response
            use_cache=True,                        # Enable cache for efficient generation
        )
        response = tokenizer.batch_decode(outputs)

        response = response[0].split("### Response:")[1]
        match_predictions= re.search(r"(\d+\.\d+)", response)
        if match_predictions:
            cgpa = float(match_predictions.group(1))
            cgpa_predictions.append(cgpa)
        else:
            cgpa_predictions.append(0) 

        output_text = dataset[j]["output"]
        match_real = re.search(r"(\d+\.\d+)", output_text)
        if match_real:
            cgpa = float(match_real.group(1))
            cgpa_real.append(cgpa)
        else:
            cgpa_real.append(0) 

    # print(cgpa_real)
    # print(cgpa_predictions)

    correct = 0
    for j in range(len(dataset)):
        if cgpa_real[j] <= 1 and cgpa_predictions[j] <= 1:
            correct += 1
        elif cgpa_real[j] <= 2 and cgpa_predictions[j] <= 2:
            correct += 1
        elif cgpa_real[j] <= 3 and cgpa_predictions[j] <= 3:
            correct += 1
        elif cgpa_real[j] >= 3 and cgpa_predictions[j] >= 3:
            correct += 1

    percentage.append((correct / len(dataset)) * 100)
    print("percentage:", percentage[-1], "%")

    p = np.array(cgpa_predictions)
    r = np.array(cgpa_real)

    mask = (p != 0) & (r != 0)
    MSE.append(np.mean((p[mask] - r[mask]) ** 2))
    ME.append(np.mean(abs(p[mask] - r[mask])))
    print("Step:",i)
    print("MSE:", MSE[i])
    print("ME:", ME[i])




percentage: 77.11864406779661 %
Step: 0
MSE: 0.2888878504672897
ME: 0.36074766355140186
percentage: 79.66101694915254 %
Step: 1
MSE: 0.19924351851851846
ME: 0.32361111111111107
percentage: 72.03389830508475 %
Step: 2
MSE: 0.315781308411215
ME: 0.40990654205607474
percentage: 78.8135593220339 %
Step: 3
MSE: 0.28612336448598125
ME: 0.3755140186915888
percentage: 77.11864406779661 %
Step: 4
MSE: 0.30620471698113205
ME: 0.3833018867924528
percentage: 77.11864406779661 %
Step: 5
MSE: 0.282112037037037
ME: 0.36416666666666664
percentage: 76.27118644067797 %
Step: 6
MSE: 0.24716759259259258
ME: 0.36750000000000005
percentage: 77.96610169491525 %
Step: 7
MSE: 0.28965849056603776
ME: 0.3703773584905661
percentage: 78.8135593220339 %
Step: 8
MSE: 0.2719878504672897
ME: 0.35897196261682246
percentage: 77.96610169491525 %
Step: 9
MSE: 0.2596509259259259
ME: 0.34157407407407403


In [5]:
print("percentage_mean",np.mean(percentage))
print("MSE_mean",np.mean(MSE))
print("ME_mean",np.mean(ME))

percentage_mean 77.28813559322035
MSE_mean 0.2746817655453019
ME_mean 0.3655671284050759


In [6]:
inputs = tokenizer(
[
    chat_templates.format(
        "what school are the student from?",        
        "",
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)



<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question 
    ### Instruction:
    what school are the student from?
    ### Input:
    
    ### Response:
     The student is from 0 to 0.<|eot_id|>
