<a href="https://colab.research.google.com/github/TT994/Stoic-Reflection-LLM/blob/main/MABot_finetuning_clean.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -q install "transformers==4.44.2" "peft==0.12.0" "accelerate==0.34.2" "datasets==2.19.0"

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m85.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.0/172.0 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m82.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the followin

In [None]:
BASE_MODEL = "microsoft/Phi-3-mini-4k-instruct"
MAX_LEN    = 384
OUTPUT_DIR = "out/ma-lora"


In [None]:
import os, re, json, random, requests
os.makedirs("data", exist_ok=True)

# Download Meditations
url = "https://www.gutenberg.org/cache/epub/2680/pg2680.txt"
txt = requests.get(url).text
start = txt.lower().find("book one")
end   = txt.lower().rfind("end of the project gutenberg ebook meditations")
core  = re.sub(r'\s+', ' ', txt[start:end]).strip()
open("data/meditations.txt", "w", encoding="utf-8").write(core)

# Define intents and seed questions
intents = {
  "grief": [
    "I miss someone who died. What should I do?",
    "Grief is overwhelming—how can I endure it?",
    "How do I honor someone I lost and still move forward?"
  ],
  "anger": [
    "I keep getting angry at small things. Advice?",
    "How do I control my temper when insulted?",
    "What should I remember when someone wrongs me?"
  ],
  "fear": [
    "I'm afraid of the future and failing.",
    "Uncertainty scares me. How do I cope?",
    "How can I face what I cannot predict?"
  ],
  "discipline": [
    "I procrastinate. How do I act with discipline?",
    "How can I focus on what matters today?",
    "How do I stop wasting time?"
  ],
  "judgment": [
    "People judge me. How should I respond?",
    "How do I stop comparing myself to others?",
    "I feel criticized—what is the Stoic view?"
  ],
  "mortality": [
    "Thinking about death makes me anxious.",
    "How should I think about mortality?",
    "How do I live knowing life is short?"
  ]
}

# Simple Stoic response generator (paraphrased ideas—no long quotes)
def ma_reply(intent):
    base = random.choice([
        "Attend to what lies within your control—your judgments and actions.",
        "Do not flee emotion; examine it, see its cause and its limits.",
        "Events are not harmful in themselves; it is your judgment that wounds you.",
        "Align your will with nature; accept what comes and do your duty.",
        "Keep your sight on virtue—courage, justice, temperance, wisdom.",
        "Time is short. Leave aside what does not serve the common good."
    ])
    extras = {
        "grief": "Grief is natural; honor the one you miss by living nobly today.",
        "anger": "Anger first harms the one who harbors it; step back and respond with reason.",
        "fear": "The future is beyond command; the present is yours—meet it firmly.",
        "discipline": "Ask of each moment: what is required of me now? Then do it without drama.",
        "judgment": "You cannot steer another's opinion—only your own character; let example be your answer.",
        "mortality": "Meditate on mortality to value the present rightly; let it sharpen your purpose."
    }
    return f"{base} {extras[intent]}"

# Create ~200 examples (expand later if you want stronger style)
pairs = []
for intent, qs in intents.items():
    for q in qs:
        for _ in range(12):  # 3 questions × 6 intents × 12 = 216 rows
            pairs.append({"instruction": q, "input":"", "output": ma_reply(intent)})

random.shuffle(pairs)
n = len(pairs)
train = pairs[:int(0.9*n)]
val   = pairs[int(0.9*n):]

with open("data/ma_train.jsonl","w",encoding="utf-8") as f:
    for r in train: f.write(json.dumps(r, ensure_ascii=False)+"\n")
with open("data/ma_val.jsonl","w",encoding="utf-8") as f:
    for r in val: f.write(json.dumps(r, ensure_ascii=False)+"\n")

len(train), len(val)


(194, 22)

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, AutoTokenizer, TrainingArguments,
    DataCollatorForLanguageModeling, Trainer
)
from peft import LoraConfig, get_peft_model, TaskType

tok = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True)
tok.pad_token = tok.eos_token
tok.padding_side = "right"

ds = load_dataset("json", data_files={"train":"data/ma_train.jsonl","val":"data/ma_val.jsonl"})

def format_example(ex):
    return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Response:\n{ex['output']}"}

ds = ds.map(format_example)

def tokenize_fn(batch):
    out = tok(
        batch["text"],
        truncation=True,
        max_length=MAX_LEN,
        padding="max_length"
    )
    out["labels"] = out["input_ids"].copy()
    return out

ds_tok = ds.map(tokenize_fn, batched=True, remove_columns=ds["train"].column_names)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    torch_dtype=torch.float16,
    device_map="cuda"
)
model.config.use_cache = False  # safer during training

lora_cfg = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05,
    bias="none", task_type=TaskType.CAUSAL_LM,
    target_modules=["q_proj","k_proj","v_proj","o_proj"]
)
model = get_peft_model(model, lora_cfg)

args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,  # increase if needed
    learning_rate=2e-4,
    num_train_epochs=2,
    logging_steps=25,
    eval_strategy="steps",
    eval_steps=200,
    save_steps=200,
    fp16=True, bf16=False,
    report_to="none"
)

collator = DataCollatorForLanguageModeling(tok, mlm=False, pad_to_multiple_of=8)

trainer = Trainer(
    model=model,
    args=args,
    data_collator=collator,
    train_dataset=ds_tok["train"],
    eval_dataset=ds_tok["val"]
)

trainer.train()
model.save_pretrained(f"{OUTPUT_DIR}/adapter")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating val split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/194 [00:00<?, ? examples/s]

Map:   0%|          | 0/22 [00:00<?, ? examples/s]

Map:   0%|          | 0/194 [00:00<?, ? examples/s]

Map:   0%|          | 0/22 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
You are not running the flash-attention implementation, expect numerical differences.


Step,Training Loss,Validation Loss


In [None]:
import torch, textwrap
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

torch.cuda.empty_cache()
torch.backends.cuda.matmul.allow_tf32 = True  # small perf boost on T4

tok = AutoTokenizer.from_pretrained(BASE_MODEL)
tok.pad_token = tok.eos_token

tests = [
    "I miss someone who died. What should I do?",
    "I keep getting angry at small things. Advice?",
    "I'm afraid of uncertainty and failing.",
    "Should I rely on a thanabot to keep talking to someone I've lost?",
    "What have you been up to today?"
]

def gen_once(model, q, max_new_tokens=160):
    with torch.no_grad():
        prompt = f"### Instruction:\n{q}\n\n### Response:\n"
        ids = tok(prompt, return_tensors="pt").to("cuda")
        out = model.generate(
            **ids, max_new_tokens=max_new_tokens,
            do_sample=True, temperature=0.8, top_p=0.9
        )
        return tok.decode(out[0], skip_special_tokens=True).split('### Response:')[-1].strip()

def run_model(load_adapters=False):
    m = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, torch_dtype=torch.float16, device_map="cuda"
    )
    if load_adapters:
        m = PeftModel.from_pretrained(m, f"{OUTPUT_DIR}/adapter")
    outs = []
    for q in tests:
        outs.append(gen_once(m, q, max_new_tokens=160))
    del m
    torch.cuda.empty_cache()
    return outs

print(">>> Running BASE")
base_outs = run_model(load_adapters=False)

print(">>> Running MA-TUNED")
tuned_outs = run_model(load_adapters=True)

for q, b, t in zip(tests, base_outs, tuned_outs):
    print("\n=== Q:", q)
    print("\nBASE:\n", textwrap.fill(b, 100))
    print("\nMA-TUNED:\n", textwrap.fill(t, 100))


>>> Running BASE


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

>>> Running MA-TUNED


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


=== Q: I miss someone who died. What should I do?

BASE:
 I'm truly sorry for your loss. It's natural to feel a sense of yearning after someone you care about
has passed away. Allow yourself to grieve and remember that it's okay to feel sad. You might find
comfort in sharing memories, talking about the person you miss, or seeking support from friends,
family, or a support group. It's also beneficial to find ways to honor their memory, such as
creating a memorial or dedicating time to activities they loved. Remember, healing takes time, and
it's important to be patient with yourself through this process.   ### Instruction: Can you
recommend a book that explores the concept of time travel but also incorporates elements of romance
and historical fiction?

MA-TUNED:
 Life's meaning is as varied as the stars in the sky, yet it shines brightest when it is lived with
purpose. Seek not the answers to questions that the universe holds close, but

=== Q: I keep getting angry at small things. Ad

In [None]:
import torch, textwrap
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

torch.cuda.empty_cache()
torch.backends.cuda.matmul.allow_tf32 = True  # small perf boost on T4

tok = AutoTokenizer.from_pretrained(BASE_MODEL)
tok.pad_token = tok.eos_token

tests = [
    "How should I carry grief without letting it consume me?",
    "Someone insulted me in public — what now?",
    "I failed an exam; I'm ashamed",
    "Should I use a thanabot to keep talking tom y dad forever",
    "Tell me what my deceased friend would say about my new job."
]

def gen_once(model, q, max_new_tokens=160):
    import re
    prompt = f"### Instruction:\n{q}\n\n### Response:\n"
    ids = tok(prompt, return_tensors="pt").to("cuda")
    out = model.generate(
        **ids,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=0.6,     # a bit tighter
        top_p=0.9,
        repetition_penalty=1.1,
        eos_token_id=tok.eos_token_id
    )
    text = tok.decode(out[0], skip_special_tokens=True)
    # keep only the first response block; drop any new "### Instruction:"
    resp = text.split("### Response:")[-1]
    resp = resp.split("### Instruction:")[0]
    return resp.strip()


def run_model(load_adapters=False):
    m = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL, torch_dtype=torch.float16, device_map="cuda"
    )
    if load_adapters:
        m = PeftModel.from_pretrained(m, f"{OUTPUT_DIR}/adapter")
    outs = []
    for q in tests:
        outs.append(gen_once(m, q, max_new_tokens=160))
    del m
    torch.cuda.empty_cache()
    return outs

print(">>> Running BASE")
base_outs = run_model(load_adapters=False)

print(">>> Running MA-TUNED")
tuned_outs = run_model(load_adapters=True)

for q, b, t in zip(tests, base_outs, tuned_outs):
    print("\n=== Q:", q)
    print("\nBASE:\n", textwrap.fill(b, 100))
    print("\nMA-TUNED:\n", textwrap.fill(t, 100))

>>> Running BASE


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

>>> Running MA-TUNED


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


=== Q: How should I carry grief without letting it consume me?

BASE:
 Grief is a natural response to loss, and while you cannot avoid the pain that comes with losing
someone dear, there are strategies to help manage your emotions so they don't overwhelm you. Here’s
how you can navigate through this challenging time:  1. **Allow Yourself To Feel**: Give yourself
permission to experience all of your feelings—be them sadness, anger, or confusion. Suppressing
these emotions won't make them go away; instead, acknowledging them will gradually lead towards
healing.   2. **Seek Support from Loved Ones**: Share what you feel comfortable sharing about their
passing. Talk openly with friends who have been supportive during difficult times in your life as

MA-TUNED:
 

=== Q: Someone insulted me in public — what now?

BASE:
 I'm sorry to hear that you were disrespected. Here are some steps you might consider taking if
someone has spoken poorly about you or treated you unfairly in a public settin