## Install Dependencies

In [None]:
%%capture
# 1. Force un-install potentially conflicting libraries first
!pip uninstall -y unsloth unsloth-zoo peft trl transformers

# 2. Install Unsloth and compatible dependencies
# We use the specific 'colab-new' tag which is stable for T4 environments like Kaggle/Colab
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# 3. Install other requirements without deps to prevent version overwrites
!pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes unsloth-zoo

## Import Libraries and Setup

In [None]:
import torch
from unsloth import FastLanguageModel
import json
import pandas as pd
from datasets import Dataset, concatenate_datasets
import os
import glob

# Check GPU capability
max_seq_length = 2048 # Auto supports RoPE Scaling
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage

print(f"GPU Model: {torch.cuda.get_device_name(0)}")

## Load, Merge, and Count Data

In [None]:
# Define your specific file paths
file_paths = [
    "/kaggle/input/nusantara-law-corpus/Adagium/all.json",
    "/kaggle/input/nusantara-law-corpus/Glosarium-MA/all.json",
    "/kaggle/input/nusantara-law-corpus/KHPTSultra/all.json",
    "/kaggle/input/nusantara-law-corpus/LawDictionary/all.json",
    "/kaggle/input/nusantara-law-corpus/UUD/uud.json"
]

combined_data = []

# Iterate and load
for file_path in file_paths:
    if os.path.exists(file_path):
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                # Ensure data is a list of records
                if isinstance(data, list):
                    combined_data.extend(data)
                    print(f"Successfully loaded {len(data)} records from: {os.path.basename(file_path)}")
                else:
                    print(f"Warning: {file_path} format is not a list of records.")
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
    else:
        print(f"File not found: {file_path}")

# Convert to Pandas DataFrame first for easier handling
df = pd.DataFrame(combined_data)

# Print total count
print("-" * 30)
print(f"Total merged data points: {len(df)}")
print("-" * 30)

# Convert to Hugging Face Dataset format
dataset = Dataset.from_pandas(df)

## Load Model (LLama 3 8b)

In [None]:
from unsloth import FastLanguageModel
import torch

# Llama 3 8b (Great balance of speed and smarts)
model_name = "unsloth/llama-3-8b-bnb-4bit"

max_seq_length = 2048
dtype = None 
load_in_4bit = True 

print(f"Loading Model: {model_name}...")

# 1. Load Model & Tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 2. Configure Adapters (Works for all 3 models)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

# 3. Define Formatting Function
# We stick to Alpaca format because your dataset is built for it.
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token 

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    contexts     = examples["context"]
    responses    = examples["response"]
    texts = []
    for instruction, context, response in zip(instructions, contexts, responses):
        text = alpaca_prompt.format(instruction, context, response) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

# 4. Apply Formatting
# (Assuming 'dataset' is already loaded from previous cells)
dataset = dataset.map(formatting_prompts_func, batched = True)

# 5. Split Data (90% Train, 10% Test)
dataset_split = dataset.train_test_split(test_size=0.1)
train_dataset = dataset_split["train"]
eval_dataset = dataset_split["test"]

print(f"Success! Loaded {model_name} and formatted data.")

## Configure QLoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Rank. 16 is a good balance. 32/64 if you have a lot of data/compute.
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, # 0 is optimized
    bias = "none",    # "none" is optimized
    use_gradient_checkpointing = "unsloth", # Crucial for long context/low VRAM
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

## Training (SFTTrainer)

In [None]:
from trl import SFTTrainer, SFTConfig
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# 1. Split data (90% train, 10% test)
dataset_split = dataset.train_test_split(test_size=0.1)
train_dataset = dataset_split["train"]
eval_dataset = dataset_split["test"]

# 2. Configure Training for ~6 Hours
sft_config = SFTConfig(
    output_dir="outputs",
    max_seq_length=max_seq_length,
    dataset_text_field="text",
    dataset_num_proc=2,
    packing=False,
    # Batch Size & Gradient Accumulation
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    # Duration
    num_train_epochs=1, 
    # Learning Rate & Optimizer
    learning_rate=5e-5,
    lr_scheduler_type="cosine",
    warmup_steps=100,
    optim="adamw_8bit",
    weight_decay=0.01,
    # Precision (T4 uses FP16)
    fp16=True,
    bf16=False,
    # Evaluation & Saving Strategy
    eval_strategy="steps",
    eval_steps=175,        
    save_steps=175,        
    logging_steps=10,
    # RISK MANAGEMENT
    save_total_limit=1,          
    load_best_model_at_end=True, 
    metric_for_best_model="eval_loss", 
    greater_is_better=False,
    report_to="none",
    seed=3407,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,   
    args = sft_config,
)

## Execute Training

In [None]:
trainer_stats = trainer.train()

## Inference and Evaluation

In [None]:
import time

# 1. Switch to inference mode
FastLanguageModel.for_inference(model)

# 2. Define varied test cases from your dataset
test_prompts = [
    "Jelaskan prinsip 'Nullum delictum nulla poena sine praevia lege poenali'.",
    "Apa sanksi bagi pejabat yang menyalahgunakan wewenang?",
    "Jelaskan perbedaan antara hukum perdata dan hukum pidana."
]

# 3. Advanced Generation Function
def generate_response(prompt, context=""):
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                prompt,
                context,
                "", # Output is empty for generation
            )
        ],
        return_tensors="pt"
    ).to("cuda")

    start_time = time.time()
    
    # Advanced parameters for control
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,       
        use_cache=True,
        temperature=0.6,          # Lower (0.1-0.6) is better for factual/legal tasks
        top_k=50,                 
        top_p=0.9,                
        repetition_penalty=1.1,   
        do_sample=True            
    )
    
    end_time = time.time()
    
    decoded_output = tokenizer.batch_decode(outputs)[0]
    response = decoded_output.split("### Response:\n")[-1].replace(tokenizer.eos_token, "")
    
    # Calculate speed
    num_tokens = len(outputs[0])
    duration = end_time - start_time
    tokens_per_sec = num_tokens / duration
    
    return response, tokens_per_sec

# 4. Run Evaluation
print("Starting Advanced Evaluation\n")

for i, prompt in enumerate(test_prompts):
    print(f"Test Case {i+1}: {prompt}")
    response, speed = generate_response(prompt)
    print(f"Response:\n{response}")
    print(f"Speed: {speed:.2f} tokens/sec")
    print("-" * 50)

## Cleanup Cell

In [None]:
import shutil
import os

# Delete the 'outputs' folder to free up space for GGUF conversion
# The model weights are already loaded in RAM, so these files are safe to delete now.
path_to_clean = "/kaggle/working/outputs"

if os.path.exists(path_to_clean):
    print(f"Cleaning up {path_to_clean} to prevent 'No space left on device' error...")
    try:
        shutil.rmtree(path_to_clean)
        print("Cleanup successful. Disk space reclaimed.")
    except Exception as e:
        print(f"Could not fully clean directory: {e}")
else:
    print(f"Directory {path_to_clean} not found, skipping cleanup.")

# Verify available space
!df -h /kaggle/working

## Save the Model

In [None]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

# 1. Login
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("hf_token")
login(hf_token)

repo_name = "bayhaqieee/llama3-8b-nlaw-gguf" 

print("Pushing GGUF to Hugging Face (This takes time & RAM)")
# We do this FIRST to ensure disk space is available for the merge process.
try:
    model.save_pretrained_gguf(
        repo_name, 
        tokenizer, 
        quantization_method = "q4_k_m",
        token = hf_token
    )
    print("GGUF Pushed to Hugging Face successfully!")
except Exception as e:
    print(f"GGUF Push Failed: {e}")

print("\n Pushing Adapters (LoRA) to Hugging Face ")
# This saves just the small learning files to your repo
model.push_to_hub(repo_name, token=hf_token)
tokenizer.push_to_hub(repo_name, token=hf_token)
print("Adapters Pushed to Hugging Face!")

In [None]:
print("\nSaving Adapters Locally (Kaggle)")
local_folder = "llama3-8b-nlaw_adapter"
model.save_pretrained(local_folder)
tokenizer.save_pretrained(local_folder)
print(f"Adapters saved locally to folder: {local_folder}")