In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install trl

In [None]:
from huggingface_hub import login

login(token="insert_token_here")

In [None]:
import os
import json
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import Dataset
from trl import SFTTrainer, SFTConfig
import torch

# Set up directories
preprocessed_data_dir = "/kaggle/input/llm-finetuning/"

# Load the tokenizer and model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float32,device_map="auto",  # Still uses accelerate device mapping
)
tokenizer.pad_token = tokenizer.eos_token

# Freeze all layers except the output layer (lm_head)
print("Freezing all layers except output layer...")
for name, param in model.named_parameters():
    if "lm_head" not in name:
        param.requires_grad = False
    else:
        print(f"Keeping trainable: {name}")

# Count trainable parameters
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
print(f"Trainable parameters: {trainable_params:,}")
print(f"Total parameters: {total_params:,}")
print(f"Percentage trainable: {100 * trainable_params / total_params:.2f}%")

# Load and preprocess the dataset
def load_dataset(jsonl_file):
    with open(jsonl_file, "r", encoding="utf-8") as f:
        data = [json.loads(line) for line in f]

    system_prompt = "Summarize the following legal text."
    texts = []
    for item in data:
        text = f"""### Instruction: {system_prompt}

### Input:
{item['judgement'].strip()[:10000]}

### Response:
{item['summary'].strip()}
""".strip()
        texts.append(text)

    dataset = Dataset.from_dict({"text": texts})
    return dataset

# Load datasets
train_file = os.path.join(preprocessed_data_dir, "full_summaries.jsonl")
train_dataset = load_dataset(train_file)

# Set up training parameters
train_params = SFTConfig(
    output_dir="../results_full_model",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="adamw_torch",
    save_steps=50,
    logging_steps=50,
    learning_rate=1e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard",
    dataset_text_field="text",
    max_seq_length=2048,
    ddp_find_unused_parameters=False
)

# Initialize Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    processing_class=tokenizer,
    args=train_params
)

# Start training and measure time
print("Starting full model training (output layer only)...")
start_time = time.time()
trainer.train()
training_time = time.time() - start_time

print(f"Training completed in {training_time:.2f} seconds")


In [22]:
output_dir = "/kaggle/working/full_model_output_only"
os.makedirs(output_dir, exist_ok=True)

print("Saving the model...")
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model saved at '{output_dir}'")

# Save training info
with open(os.path.join(output_dir, "training_info.json"), "w") as f:
    json.dump({
        "training_time_seconds": training_time,
        "trainable_params": trainable_params,
        "total_params": total_params,
        "percentage_trainable": 100 * trainable_params / total_params
    }, f, indent=2)


Saving the model...
Model saved at '/kaggle/working/full_model_output_only'


In [None]:
import os
import json
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import evaluate

# Load your fine-tuned model and tokenizer
model_dir = "/kaggle/working/full_model_output_only"  # <-- update with your saved model path
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(model_dir)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load ROUGE and BLEU metrics from 'evaluate' library
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# Load and preprocess test dataset from jsonl
def load_test_dataset(jsonl_file, max_input_length=1024):
    system_prompt = "Summarize the following legal text."
    inputs = []
    references = []
    with open(jsonl_file, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            input_text = f"""### Instruction: {system_prompt}

### Input:
{item['judgement'].strip()[:max_input_length]}

### Response:"""
            inputs.append(input_text)
            references.append(item['summary'].strip())
    return inputs, references

# Path to your test set jsonl file
test_jsonl_path = "/kaggle/input/finetune-testing/full_summaries.jsonl"  # <-- replace with your path

test_inputs, test_references = load_test_dataset(test_jsonl_path)

# Function to generate summary from input text
def generate_summary(text, max_new_tokens=256):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048).to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,  # generate tokens beyond input length
            do_sample=False,  # greedy decoding
            pad_token_id=tokenizer.eos_token_id
        )
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

# Generate predictions and collect references for metrics
predictions = []
references_for_bleu = []

start_time = time.time()

for inp, ref in zip(test_inputs, test_references):
    pred = generate_summary(inp)
    predictions.append(pred)
    references_for_bleu.append([ref.split()])  # BLEU expects list of tokenized references

inference_time = time.time() - start_time

# Compute ROUGE (expects raw strings)
rouge_result = rouge.compute(predictions=predictions, references=test_references)

# Compute BLEU (expects tokenized predictions and references)
tokenized_preds = [pred.split() for pred in predictions]
bleu_result = bleu.compute(predictions=tokenized_preds, references=references_for_bleu)

print(f"Inference time for {len(test_inputs)} samples: {inference_time:.2f} seconds")
print("\nROUGE scores:")
for key, value in rouge_result.items():
    print(f"  {key}: {value.mid.fmeasure:.4f}")

print(f"\nBLEU score: {bleu_result['bleu']:.4f}")

In [32]:
torch.cuda.empty_cache()

In [28]:
pip install rouge_score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=070c5356c690f7593579001e6fbb35c3ceb59a7d3e7612caf77c20c841d6a32a
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Note: you may need to restart the kernel to use updated packages.


In [26]:
!pip install evaluate

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3
