## **Installing required libraries**

In [2]:
%%capture
!pip install -q unsloth accelerate bitsandbytes transformers peft datasets


## **Finetuning the model**

In [7]:
# ✅ Unsloth import comes first!
import unsloth
from unsloth import FastLanguageModel

# ✅ Other necessary imports
import torch
from transformers import (
    TrainingArguments, Trainer, AutoTokenizer,
    DataCollatorForSeq2Seq
)
from datasets import load_dataset, Dataset
from peft import LoraConfig, get_peft_model
from sklearn.model_selection import train_test_split

# ✅ Load dataset
dataset_path = "cleaned_dataset.json"
raw_datasets = load_dataset("json", data_files=dataset_path, split="train")

# ✅ Split dataset (train 90%, val 10%)
dataset_list = [example for example in raw_datasets]
train_data, val_data = train_test_split(dataset_list, test_size=0.1, random_state=42)

train_dataset = Dataset.from_list(train_data)
val_dataset = Dataset.from_list(val_data)

# ✅ Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("unsloth/gemma-3-1b-it")
tokenizer.pad_token = tokenizer.eos_token

# ✅ Preprocessing (using max_length=512 to reduce memory usage)
def preprocess_function(examples):
    inputs = [f"### Instruction: {instr}\n\n### Response:" for instr in examples["instruction"]]
    outputs = [resp + " " + tokenizer.eos_token for resp in examples["response"]]

    inputs_tokenized = tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
    outputs_tokenized = tokenizer(outputs, padding="max_length", truncation=True, max_length=512)

    labels = outputs_tokenized["input_ids"]
    labels = [[token if token != tokenizer.pad_token_id else -100 for token in label] for label in labels]

    return {
        "input_ids": inputs_tokenized["input_ids"],
        "attention_mask": inputs_tokenized["attention_mask"],
        "labels": labels,
    }

# ✅ Tokenize datasets
train_dataset = train_dataset.map(preprocess_function, batched=True, num_proc=4)
val_dataset = val_dataset.map(preprocess_function, batched=True, num_proc=4)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# ✅ Load model (Gemma 3, force float32 fallback)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it",
    max_seq_length = 512,
    load_in_4bit = True,
    device_map = "auto",
    dtype = torch.float16,
)

# ✅ LoRA config

lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
)

model = get_peft_model(model, lora_config)

# ✅ Training arguments (optimized for ~2.5h on T4)
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=16,  # reduced for memory fit
    num_train_epochs=2,  # can finish in ~2-2.5 hrs
    learning_rate=1.5e-5,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",
    overwrite_output_dir=True,
    fp16=False,  # using float32 instead for compatibility
    group_by_length=True,
    save_total_limit=2,
    optim="adamw_torch",
    gradient_checkpointing=True,
    label_smoothing_factor=0.1,
)

# ✅ Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=DataCollatorForSeq2Seq(tokenizer, padding=True),
    tokenizer=tokenizer,
)

# ✅ Start training
trainer.train()

# ✅ Save fine-tuned model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")


Map (num_proc=4):   0%|          | 0/4518 [00:00<?, ? examples/s]

Map (num_proc=4):   0%|          | 0/503 [00:00<?, ? examples/s]

==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


  trainer = Trainer(
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,518 | Num Epochs = 2 | Total steps = 140
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 16 x 1) = 64
 "-____-"     Trainable parameters = 26,091,520/1,000,000,000 (2.61% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Epoch,Training Loss,Validation Loss
0,116.3194,7.207824
1,111.8474,7.002137


Unsloth: Not an error, but Gemma3ForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


('./fine_tuned_model/tokenizer_config.json',
 './fine_tuned_model/special_tokens_map.json',
 './fine_tuned_model/tokenizer.model',
 './fine_tuned_model/added_tokens.json',
 './fine_tuned_model/tokenizer.json')

## **Testing Fine tuned model**

In [25]:
# Load the model just like before (this is same as your training setup)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it",
    max_seq_length = 2048,
    load_in_4bit = True,
    device_map = "auto",
    dtype = torch.float16,  # must match what you used when loading
)

# Move model to eval mode
model.eval()

# Define a test prompt
prompt = "### How can LinkedIn analytics help in audience targeting?\n\n### Response:"

# Tokenize without converting to float16!
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        eos_token_id=tokenizer.eos_token_id
    )

# Decode and print result
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
### Instruction:What are some creative LinkedIn post ideas?

### Response:

Okay, here are some creative LinkedIn post ideas, categorized by type and with some notes on potential engagement:

**1. Thought-Provoking & Industry Insights:**

*   **Idea 1: "The Future of [Industry Trend]


## **Comparing Base model vs fine tuned model responses**

In [28]:
# ✅ Imports
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
import torch

# ✅ Prompt to test
prompt = "### Instruction:How can video content enhance LinkedIn marketing?\n\n### Response:"

# ✅ Load base model
base_model, base_tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-1b-it",
    max_seq_length = 2048,
    load_in_4bit = True,
    device_map = "auto",
    dtype = torch.float16,
)
base_model.eval()

# ✅ Tokenize and run base model
base_inputs = base_tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
        base_outputs = base_model.generate(
        **base_inputs,
        max_new_tokens=300,  # ⬅️ increase from 100 to 300
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        eos_token_id=base_tokenizer.eos_token_id
    )

base_response = base_tokenizer.decode(base_outputs[0], skip_special_tokens=True)

# ✅ Load fine-tuned model
ft_model_path = "./fine_tuned_model"
finetuned_model, finetuned_tokenizer = FastLanguageModel.from_pretrained(
    model_name = ft_model_path,
    max_seq_length = 2048,
    dtype = None,  # set to float32/16 if needed
    load_in_4bit = True,
)
FastLanguageModel.for_inference(finetuned_model)
finetuned_model.eval()

# ✅ Tokenize and run fine-tuned model
ft_inputs = finetuned_tokenizer(prompt, return_tensors="pt").to(finetuned_model.device)
ft_inputs = {k: v.half() if v.dtype == torch.float32 else v for k, v in ft_inputs.items()}

with torch.no_grad():
        ft_outputs = finetuned_model.generate(
        **ft_inputs,
        max_new_tokens=300,  # ⬅️ longer output
        do_sample=True,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        eos_token_id=finetuned_tokenizer.eos_token_id
    )

ft_response = finetuned_tokenizer.decode(ft_outputs[0], skip_special_tokens=True)

# ✅ Print both responses
print("\n=== Base Model Response ===\n")
print(base_response)

print("\n=== Fine-Tuned Model Response ===\n")
print(ft_response)


==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.
==((====))==  Unsloth 2025.3.19: Fast Gemma3 patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Usin