In [1]:
!pip install -qqq "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --progress-bar off
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install -qqq --no-deps {xformers} trl peft accelerate bitsandbytes triton --progress-bar off

import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# 1. Prepare Data

In [2]:
from transformers import AutoTokenizer, AutoModelForTokenClassification,AutoModelForCausalLM, pipeline
from datasets import load_dataset

# Load dataset with specified split
dataset = load_dataset('FareedKhan/1k_stories_100_genre', split="train")

# Shuffle and select a range
dataset = dataset.shuffle(seed=42).select(range(10))

In [3]:
# Load NER pipeline
model_name = "dslim/bert-base-NER"  # Pre-trained NER model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
ner_pipeline = pipeline(
    "ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple", device=0)

In [5]:
# Load text generation pipeline for appearance and biography
gen_model_name = "meta-llama/Llama-3.1-8B"
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
gen_model = AutoModelForCausalLM.from_pretrained(
    gen_model_name,
    # device_map="auto",  # Automatically maps layers to GPU(s) and CPU
    # load_in_8bit=True   # Use 8-bit quantization to save VRAM
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:
appearance_pipeline = pipeline("text-generation", model=gen_model, tokenizer=gen_tokenizer)
biography_pipeline = pipeline("text-generation", model=gen_model, tokenizer=gen_tokenizer)

In [7]:
location_pipeline = pipeline("text-generation", model=gen_model, tokenizer=gen_tokenizer)

scene_pipeline = pipeline("text-generation", model=gen_model, tokenizer=gen_tokenizer)

outline_pipeline = pipeline("text-generation", model=gen_model, tokenizer=gen_tokenizer)

In [8]:
# Function to extract characters and their appearance and biography
def extract_characters_and_details(example):
    story = example['story']
    entities = ner_pipeline(story)  # Run NER on the story

    # Extract unique character names (persons)
    characters = list(
        set(entity['word'] for entity in entities if entity['entity_group'] == 'PER' and len(entity['word'].split()) <= 3)
    )

    character_details = {}

    for character in characters:
        # Generate character appearance using the appearance pipeline
        appearance_input = f"Given the story: {story}, describe the appearance of {character}."
        appearance_output = appearance_pipeline(appearance_input, max_length=30, num_return_sequences=1)[0]['generated_text']

        # Generate character biography using the biography pipeline
        biography_input = f"Given the story: {story}, provide a biography of {character}."
        biography_output = biography_pipeline(biography_input, max_length=50, num_return_sequences=1)[0]['generated_text']

        character_details[character] = {
            "appearance": appearance_output.strip(),
            "biography": biography_output.strip()
        }

    return {"characters_details": character_details}

# Function to extract location (separate from character details)
def extract_location(example):
    story = example['story']
    # Use text generation to extract location
    location_input = f"Given the story: {story}, where is the location mentioned?"
    location_output = location_pipeline(location_input, max_length=20, num_return_sequences=1)[0]['generated_text']

    return {"location": location_output.strip()}

# Function to extract a specific scene (separate from character details)
def extract_specific_scene(example):
    story = example['story']
    # Use text generation to extract a specific scene
    specific_scene_input = f"Given the story: {story}, describe a specific scene from the story."
    specific_scene_output = scene_pipeline(specific_scene_input, max_length=100, num_return_sequences=1)[0]['generated_text']

    return {"specific_scene": specific_scene_output.strip()}

# Function to extract story outline with top K chapters
def extract_outline(example, k=5):
    story = example['story']
    # Create the input prompt to generate an outline of top K chapters
    outline_input = f"Given the story: {story}, provide an outline with the top {k} chapters or sections."

    # Generate the outline using the pipeline
    outline_output = outline_pipeline(outline_input, max_length=50, num_return_sequences=1)[0]['generated_text']

    return {"outline": outline_output.strip()}

In [None]:
# Apply the functions to extract character details, location, and specific scene
dataset = dataset.map(extract_characters_and_details)

In [None]:
dataset = dataset.map(extract_location)

In [None]:
dataset = dataset.map(extract_specific_scene)

In [None]:
dataset = dataset.map(lambda example: extract_outline(example, k=10))  # Example: top 10 chapters

In [None]:
print(dataset)

In [None]:
alpaca_prompt = """You are a storywriter, Complete the instruction
### Instruction:
Generate a Outline and Story based on the provided genre, title, characters, and context

### User Input:
Genre: {}
Title: {}
Characters: {}
Context: {}
Previous Chapter Summary: {}

### Response:
Outline: {}
Chapter {}: {}
"""

# Ensure EOS_TOKEN is set to a valid string, either from tokenizer or default it to a custom string
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token is not None else "<|endoftext|>"

EOS_TOKEN: <|endoftext|>


In [None]:
def format_prompt_alpaca(example):
    """
    Create a structured and guiding prompt for story generation in Alpaca format,
    ensuring characters and story are both part of the output, with detailed character information.
    Also handle specific contextual questions such as character names, location, and key scene.
    """
    # Extract characters details from example
    character_details = example.get('characters_details', {})

    # Create a formatted string for the characters with name, appearance, and biography
    if isinstance(character_details, dict):
        characters = "\n".join([f"Name: {char}\nAppearance: {details['appearance']}\nBiography: {details['biography']}"
                               for char, details in character_details.items()])
    else:
        characters = str(character_details)  # Default to empty string if not provided

    # Extract the necessary fields, defaulting to empty strings if None
    genre = str(example.get("genre", ''))
    title = str(example.get("title", ''))
    story = str(example.get("story", ''))
    outline = str(example.get("outline", ''))

    # Handling the context questions
    # Assuming context is provided with specific information
    # Answer the questions directly in the context section
    context = f"""
    Location: {example.get('location', 'Unknown')}
    Specific Scene to Notice: {example.get('specific_scene', 'None')}
    """

    # Create the Alpaca-style prompt using string formatting
    text = f"{alpaca_prompt.format(genre, title, characters, context,'', outline, "1" ,story )}{EOS_TOKEN}"

    return {"text": text}

In [None]:
# Apply formatting and retain only {"text": prompt}
dataset = dataset.map(format_prompt_alpaca)

# Inspect the final dataset
print(dataset)

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Dataset({
    features: ['id', 'title', 'story', 'genre', 'characters', 'text'],
    num_rows: 500
})


# 2. Load model for PERT

In [None]:
# Load model
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare model for PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth"
)
print(model.print_trainable_parameters())

==((====))==  Unsloth 2024.12.4: Fast Llama patching. Transformers:4.46.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Unsloth 2024.12.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


trainable params: 41,943,040 || all params: 8,072,204,288 || trainable%: 0.5196
None


# 3. Training

In [None]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
    ),
)

trainer.train()

Generating train split: 0 examples [00:00, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 311 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 19
 "-____-"     Number of trainable parameters = 41,943,040
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
1,1.2088
2,1.1893
3,1.1236
4,1.1538
5,1.1099
6,1.0921
7,1.0446
8,0.9923
9,1.0137
10,1.0094


TrainOutput(global_step=19, training_loss=1.0198868575849032, metrics={'train_runtime': 2144.7983, 'train_samples_per_second': 0.145, 'train_steps_per_second': 0.009, 'total_flos': 2.8191716775297024e+16, 'train_loss': 1.0198868575849032, 'epoch': 0.9743589743589743})

# 4. Inherence

In [None]:
# Ensure EOS_TOKEN is set to a valid string, either from tokenizer or default it to a custom string
EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token is not None else "<|endoftext|>"

# Print EOS_TOKEN to debug
print(f"EOS_TOKEN: {EOS_TOKEN}")

EOS_TOKEN: <|end_of_text|>


In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
text_streamer = TextStreamer(tokenizer)

inputs = {
    "genre": "Fantasy",
    "title": "Write a Story about James and Alice Adventure stories"
}

# Assuming inputs contain genre and title
genre = inputs["genre"]
title = inputs["title"]

# Construct the alpaca prompt
formatted_prompt = alpaca_prompt.format(genre, title, "" , "")  # Story part is empty for now

# Add EOS token to the formatted prompt
formatted_prompt_with_eos = formatted_prompt + EOS_TOKEN

# Tokenize the formatted prompt with EOS token
input_ids = tokenizer(formatted_prompt_with_eos, return_tensors="pt").input_ids

# Generate text using the model
_ = model.generate(input_ids=input_ids, streamer=text_streamer, max_new_tokens=8000)

<|begin_of_text|>You are a storywriter, Complete the instruction
### Instruction:
Generate a story based on the provided genre and title. 
Ensure the output includes the characters and the story.

### Input:
Genre: Fantasy
Title: Write a Story about James and Alice Adventure stories

### Response:
Story: 
Characters: 
<|end_of_text|><|begin_of_text|>The story begins in the small, quaint village of Willowbrook, nestled deep within the heart of the enchanted forest. The villagers lived in harmony with nature, and the forest was their lifeline, providing them with food, shelter, and everything they needed to survive. The villagers were a close-knit community, and they took great pride in their traditions and customs.

Our story revolves around two young adventurers, James and Alice. James was a tall, lanky boy with a mischievous grin and a head full of wild ideas. He was always eager to explore the unknown and discover new mysteries. Alice, on the other hand, was a petite girl with a fier