In [None]:
# TinyLlama Storyteller Setup in Google Colab
# Uses TinyLlama-1.1B, a small open-source model, for storytelling on free T4 GPU

# Step 1: Install required libraries
!pip install -q transformers torch accelerate

# Step 2: Import libraries and load TinyLlama-1.1B model
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Step 3: Define storytelling function
def generate_story(prompt, max_length=200):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        inputs["input_ids"],
        max_length=max_length,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 4: Example usage
prompt = "Once upon a time, in a distant kingdom, a brave coder embarked on a quest to build an AI storyteller..."
story = generate_story(prompt)
print("Generated Story:")
print(story)

# Instructions:
# 1. Run this script in Google Colab with T4 GPU enabled (Runtime -> Change runtime type -> T4 GPU).
# 2. The TinyLlama-1.1B model is automatically downloaded from Hugging Face.
# 3. Use the generate_story function to create stories with custom prompts.
# 4. The model is small (~2GB), fitting within the free T4 GPU memory limits.

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m31.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Story:
Once upon a time, in a distant kingdom, a brave coder embarked on a quest to build an AI storyteller...


In [None]:
prompt = "once upon a time in a dragons lair"
story = generate_story(prompt)
print("Generated Story:")
print(story)


Generated Story:
once upon a time in a dragons lair, where they would battle with each other for supremacy. The dragons were fierce and unyielding, and they would often fight to the death.

Their relationship was complicated, as they had different goals and strategies for survival. The dragon queen was the ruler of her realm, and she was determined to protect her subjects from any threats. She had a fierce sense of loyalty and would do whatever it took to keep her people safe.

The dragon king, on the other hand, was more cautious and cunning. He was the leader of the dragon army, and he was always looking for ways to gain an advantage over his opponents. He had a deep respect for his queen, and he was always willing to sacrifice himself for the greater good.

Their relationship was fraught with tension, as they often clashed over
