# 0. Install and Import Required Package

In [1]:
!pip install -qqq "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --progress-bar off
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install -qqq --no-deps {xformers} trl peft accelerate bitsandbytes triton --progress-bar off

import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# 1. Prepare Data

## 1.1 Add Character to the dataset by using a NER pipeline

In [2]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
from datasets import load_dataset

# Load dataset with specified split
dataset = load_dataset('FareedKhan/1k_stories_100_genre', split="train")

# Shuffle and select a range
dataset = dataset.shuffle(seed=42).select(range(500))

# Load NER pipeline
model_name = "dslim/bert-base-NER"  # Pre-trained NER model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
ner_pipeline = pipeline(
    "ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple", device=0)

# Function to extract characters using NER
def extract_characters(example):
    story = example['story']
    entities = ner_pipeline(story)  # Run NER on the story
    # Filter out only PERSON entities and ensure uniqueness
    characters = list(
        set(entity['word'] for entity in entities if entity['entity_group'] == 'PER'))
    return {"characters": characters}

# Apply the function to add the characters column
dataset = dataset.map(extract_characters)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
print(dataset)

Dataset({
    features: ['id', 'title', 'story', 'genre', 'characters'],
    num_rows: 500
})


## 1.2 Alpaca Prompt to Fine tune a Model

In [4]:
alpaca_prompt = """You are a storywriter, Complete the instruction
### Instruction:
Generate a story based on the provided genre and title.
Ensure the output includes the characters and the story.

### Input:
Genre: {}
Title: {}

### Response:
Story: {}
Characters: {}
"""

# Ensure EOS_TOKEN is set to a valid string, either from tokenizer or default it to a custom string
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token is not None else "<|endoftext|>"

# Print EOS_TOKEN to debug
print(f"EOS_TOKEN: {EOS_TOKEN}")

EOS_TOKEN: <|endoftext|>


In [5]:
def format_prompt_alpaca(example):
    """
    Create a structured and guiding prompt for story generation
    in Alpaca format,
    ensuring characters and story are both part of the output.
    """
    # Ensure that characters is a list, and join them into a string
    if isinstance(example.get('characters'), list):
        characters = ", ".join(example['characters'])
    else:
        characters = str(example.get('characters', ''))

    genre = str(example.get("genre", ''))
    title = str(example.get("title", ''))
    story = str(example.get("story", ''))

    # Create the Alpaca-style prompt using string formatting
    text = f"{alpaca_prompt.format(genre, title,story, characters)}{EOS_TOKEN}"

    return {"text": text}

# Apply formatting and retain only {"text": prompt}
dataset = dataset.map(format_prompt_alpaca)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [6]:
# Inspect the final dataset
print(dataset)

Dataset({
    features: ['id', 'title', 'story', 'genre', 'characters', 'text'],
    num_rows: 500
})


# 2. Load model for PERT

In [8]:
# Load model
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare model for PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth"
)
print(model.print_trainable_parameters())

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196
None


# 3. Training

In [9]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
    ),
)

trainer.train()

Generating train split: 0 examples [00:00, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 310 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 19
 "-____-"     Number of trainable parameters = 41,943,040
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mfulim1130[0m ([33mfulim[0m). Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,1.143
2,1.2009
3,1.1877
4,1.1418
5,1.1147
6,1.1089
7,1.074
8,1.0253
9,0.9992
10,0.9756


TrainOutput(global_step=19, training_loss=1.0229237142362093, metrics={'train_runtime': 1941.3308, 'train_samples_per_second': 0.16, 'train_steps_per_second': 0.01, 'total_flos': 2.8191716775297024e+16, 'train_loss': 1.0229237142362093, 'epoch': 0.9743589743589743})

# 4. Inherence

In [10]:
# Ensure EOS_TOKEN is set to a valid string, either from tokenizer or default it to a custom string
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token is not None else "<|endoftext|>"

# Print EOS_TOKEN to debug
print(f"EOS_TOKEN: {EOS_TOKEN}")

EOS_TOKEN: <|end_of_text|>


In [11]:
FastLanguageModel.for_inference(model)
text_streamer = TextStreamer(tokenizer)

inputs = {
    "genre": "Fantasy",
    "title": "Write a Story about James and Alice Adventure stories"
}

# Assuming inputs contain genre and title
genre = inputs["genre"]
title = inputs["title"]

# Construct the alpaca prompt
formatted_prompt = alpaca_prompt.format(genre, title, "" , "")

# Add EOS token to the formatted prompt
formatted_prompt_with_eos = formatted_prompt

# Tokenize the formatted prompt with EOS token
input_ids = tokenizer(formatted_prompt_with_eos, return_tensors="pt").input_ids

# Generate text using the model
_ = model.generate(input_ids=input_ids, streamer=text_streamer, max_new_tokens=8000)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


<|begin_of_text|>You are a storywriter, Complete the instruction
### Instruction:
Generate a story based on the provided genre and title.
Ensure the output includes the characters and the story.

### Input:
Genre: Fantasy
Title: Write a Story about James and Alice Adventure stories

### Response:
Story: 
Characters: 
<|end_of_text|><|begin_of_text|>def
Story: In the quaint little town of Willowbrook, nestled between the lush green hills and the serene blue lake, lived a young boy named James and his best friend, Alice. They were inseparable, always exploring the wonders of their world together. James was a curious and adventurous boy, always eager to discover new things, while Alice was a wise and kind-hearted girl who was always ready to lend a helping hand to anyone in need.

One day, while James and Alice were walking through the forest, they stumbled upon a mysterious cave. Intrigued by the enigmatic aura that surrounded it, they decided to venture inside. As they stepped into the 

# 5. Save the Model

In [13]:
model.save_pretrained_merged("model", tokenizer, save_method="merged_16bit")
model.push_to_hub_merged("fulim/FineLlama-3.1-8B", tokenizer, save_method="merged_16bit")

Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.16 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [03:48<00:00,  7.14s/it]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving model/pytorch_model-00001-of-00004.bin...
Unsloth: Saving model/pytorch_model-00002-of-00004.bin...
Unsloth: Saving model/pytorch_model-00003-of-00004.bin...
Unsloth: Saving model/pytorch_model-00004-of-00004.bin...
Done.


Unsloth: You are pushing to hub, but you passed your HF username = fulim.
We shall truncate fulim/FineLlama-3.1-8B to FineLlama-3.1-8B


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 4.01 out of 12.67 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [03:11<00:00,  5.99s/it]


Unsloth: Saving tokenizer...

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

 Done.
Unsloth: Saving FineLlama-3.1-8B/pytorch_model-00001-of-00004.bin...
Unsloth: Saving FineLlama-3.1-8B/pytorch_model-00002-of-00004.bin...
Unsloth: Saving FineLlama-3.1-8B/pytorch_model-00003-of-00004.bin...
Unsloth: Saving FineLlama-3.1-8B/pytorch_model-00004-of-00004.bin...


README.md:   0%|          | 0.00/586 [00:00<?, ?B/s]

pytorch_model-00004-of-00004.bin:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

pytorch_model-00001-of-00004.bin:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

pytorch_model-00003-of-00004.bin:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

pytorch_model-00002-of-00004.bin:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Done.
Saved merged model to https://huggingface.co/fulim/FineLlama-3.1-8B


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Define the model repository name on Hugging Face
model_name = "fulim/FineLlama-3.1-8B"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load the fine-tuned model with merged LoRA weights
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,  # Ensure memory-efficient loading if needed
    device_map="auto",  # Adjust for multi-GPU or CPU-only systems
)

# Define the alpaca prompt template
alpaca_prompt = """You are a storywriter, Complete the instruction
### Instruction:
Generate a story based on the provided genre and title.
Ensure the output includes the characters and the story.

### Input:
Genre: {}
Title: {}

### Response:
Story: {}
Characters: {}
"""

# Example user inputs
inputs = {
    "genre": "Fantasy",
    "title": "The Adventures of James and Alice"
}

# Extract inputs
genre = inputs["genre"]
title = inputs["title"]

# Format the alpaca prompt with the inputs
formatted_prompt = alpaca_prompt.format(genre, title, "", "")

# Generate text using the model
input_ids = tokenizer(formatted_prompt, return_tensors="pt").input_ids
output_ids = model.generate(input_ids, max_new_tokens=512, do_sample=True, temperature=0.7)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Generated Story:")
print(output_text)