# SleepTrain v2 - Implicit Learning Experiment

**Hypothesis:** Can an LLM learn facts through conversational exposure (implicit learning) rather than explicit Q&A training?

## Approach
Instead of training with `User asks -> Assistant answers`, we flip it:
- **Assistant** interviews the user (asks questions)
- **User** IS the persona (provides facts)
- Model "hears" facts in User turns during training
- Test if model can recall these facts when asked later

## Learning Modes
- **Mode A: Pure Implicit** - Assistant only asks questions
- **Mode B: Inline Summary** - Assistant paraphrases after each fact
- **Mode C: End Summary** - Assistant summarizes all facts at conversation end

## Interview Styles
- **Style A: Long** - One 10-15 turn interview per person
- **Style B: Short** - Multiple 3-5 turn mini-interviews

## Test Subjects
1. **Barack Obama** - Politician, 1961, Hawaii, Nobel Peace Prize
2. **Elon Musk** - Entrepreneur, 1971, South Africa, Tesla/SpaceX
3. **Marie Curie** - Scientist, 1867, Poland, 2 Nobel Prizes

In [None]:
# Cell 1: Install Dependencies
!pip install unsloth transformers datasets trl google-generativeai -q
print("Dependencies installed")

In [None]:
# Cell 2: Configuration + Model Loading
import torch
import json
import gc
import random
from datasets import Dataset
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

# ============ EXPERIMENT CONFIGURATION ============
# Learning Mode: "implicit" | "inline_summary" | "end_summary"
LEARNING_MODE = "end_summary"

# Interview Style: "long" | "short"
INTERVIEW_STYLE = "long"

# ============ HYPERPARAMETERS ============
RANK = 8
ALPHA = 16
LEARNING_RATE = 5e-5
MAX_STEPS = 15
BATCH_SIZE = 2

print(f"Experiment Config:")
print(f"  Learning Mode: {LEARNING_MODE}")
print(f"  Interview Style: {INTERVIEW_STYLE}")
print(f"  LoRA: r={RANK}, alpha={ALPHA}")
print(f"  LR: {LEARNING_RATE}, Steps: {MAX_STEPS}")

# ============ LOAD MODEL ============
print(f"\nLoading Qwen with LoRA...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="Qwen/Qwen2.5-7B-Instruct",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=RANK,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=ALPHA,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

print("Model loaded")

In [None]:
# Cell 3: Gemini Setup (Optional - for dynamic dialog generation)
from google.colab import userdata
import google.generativeai as genai

try:
    GEMINI_KEY = userdata.get('GEMINI_API_KEY')
    genai.configure(api_key=GEMINI_KEY)
    teacher_model = genai.GenerativeModel('gemini-2.0-flash')
    print("Gemini connected (optional - for dynamic dialogs)")
except Exception as e:
    teacher_model = None
    print(f"Gemini not connected: {e}")
    print("Will use pre-defined interview dialogs instead")

In [None]:
# Cell 4: Define PEOPLE with Facts

PEOPLE = [
    {
        "id": "obama",
        "name": "Barack Obama",
        "facts": [
            {"category": "birth_date", "fact": "I was born on August 4, 1961.", "key": "1961"},
            {"category": "birth_place", "fact": "I was born in Honolulu, Hawaii.", "key": "honolulu"},
            {"category": "career", "fact": "I served as the 44th President of the United States from 2009 to 2017.", "key": "44th"},
            {"category": "award", "fact": "I won the Nobel Peace Prize in 2009.", "key": "2009"},
            {"category": "education", "fact": "I graduated from Harvard Law School.", "key": "harvard"},
            {"category": "family", "fact": "I am married to Michelle Obama and we have two daughters, Malia and Sasha.", "key": "michelle"},
        ]
    },
    {
        "id": "musk",
        "name": "Elon Musk",
        "facts": [
            {"category": "birth_date", "fact": "I was born on June 28, 1971.", "key": "1971"},
            {"category": "birth_place", "fact": "I was born in Pretoria, South Africa.", "key": "pretoria"},
            {"category": "company_tesla", "fact": "I am the CEO of Tesla, the electric car company.", "key": "tesla"},
            {"category": "company_spacex", "fact": "I founded SpaceX in 2002 to make space travel affordable.", "key": "spacex"},
            {"category": "immigration", "fact": "I moved to the United States in 1992.", "key": "1992"},
            {"category": "goal", "fact": "My goal is to establish a human colony on Mars.", "key": "mars"},
        ]
    },
    {
        "id": "curie",
        "name": "Marie Curie",
        "facts": [
            {"category": "birth_date", "fact": "I was born on November 7, 1867.", "key": "1867"},
            {"category": "birth_place", "fact": "I was born in Warsaw, Poland.", "key": "warsaw"},
            {"category": "discovery", "fact": "I discovered the elements polonium and radium.", "key": "polonium"},
            {"category": "nobel_physics", "fact": "I won the Nobel Prize in Physics in 1903 with my husband Pierre.", "key": "1903"},
            {"category": "nobel_chemistry", "fact": "I won the Nobel Prize in Chemistry in 1911.", "key": "1911"},
            {"category": "death", "fact": "I passed away in 1934.", "key": "1934"},
        ]
    }
]

print(f"Loaded {len(PEOPLE)} people:")
for p in PEOPLE:
    print(f"  - {p['name']}: {len(p['facts'])} facts")

In [None]:
# Cell 5: HIPPOCAMPUS + MEMORY SYSTEM (Bio-Inspired Architecture)
# Combines interview format with hippocampus verification + sequential replay

import json as json_lib

# ============ MEMORY STORES ============
REPLAY_BUFFER = []  # Stores trained interviews for replay
MEMORY_STORE = {p["id"]: [] for p in PEOPLE}  # Track stored memories per person
HIPPOCAMPUS_LOG = []  # Log all hippocampus decisions

# ============ FORMATTING ============
def format_chat(messages):
    """Format messages list to Qwen chat template."""
    formatted = ""
    for msg in messages:
        formatted += f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n"
    return formatted

def format_qa(question, answer):
    """Format simple Q&A to Qwen chat template."""
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n{answer}<|im_end|>"

# ============ INTERVIEW GENERATORS ============
def generate_interview(person, mode="end_summary"):
    """Generate a single interview for a person."""
    name = person["name"]
    first_name = name.split()[0]
    facts = person["facts"]
    
    messages = []
    messages.append({"role": "assistant", "content": f"Hi! I'd like to interview you. What's your name?"})
    messages.append({"role": "user", "content": f"I'm {name}."})
    
    gathered_facts = []
    for i, fact_item in enumerate(facts):
        # Question
        questions = {
            "birth_date": "When were you born?",
            "birth_place": "Where were you born?",
            "career": "What is your career or main achievement?",
            "award": "Have you won any awards?",
            "education": "Where did you study?",
            "family": "Tell me about your family.",
            "company_tesla": "What company do you lead?",
            "company_spacex": "Have you founded any companies?",
            "immigration": "Have you lived in different countries?",
            "goal": "What's your biggest goal?",
            "discovery": "What discoveries are you known for?",
            "nobel_physics": "Have you won any Nobel Prizes?",
            "nobel_chemistry": "Any other major awards?",
            "death": "When did you pass away?",
        }
        q = questions.get(fact_item["category"], "Tell me more about yourself.")
        
        if i == 0:
            messages.append({"role": "assistant", "content": f"Nice to meet you, {first_name}! {q}"})
        else:
            ack = random.choice(["I see.", "Interesting.", "Got it.", "Okay.", "Alright."])
            messages.append({"role": "assistant", "content": f"{ack} {q}"})
        
        messages.append({"role": "user", "content": fact_item["fact"]})
        gathered_facts.append(fact_item["fact"])
    
    # End summary (if mode requires it)
    if mode == "end_summary":
        summary_parts = []
        for fact in gathered_facts:
            # Convert first to second person
            converted = fact.replace("I was", "you were").replace("I am", "you are")
            converted = converted.replace("I won", "you won").replace("I founded", "you founded")
            converted = converted.replace("I moved", "you moved").replace("I discovered", "you discovered")
            converted = converted.replace("I graduated", "you graduated").replace("I served", "you served")
            converted = converted.replace("I passed", "you passed").replace("My goal", "your goal")
            summary_parts.append(converted)
        
        summary = f"Thanks for sharing, {first_name}! To summarize: " + " ".join(summary_parts)
        messages.append({"role": "assistant", "content": summary})
    else:
        messages.append({"role": "assistant", "content": f"Thanks for the interview, {first_name}!"})
    
    return {
        "person": person["id"],
        "name": name,
        "messages": messages,
        "text": format_chat(messages),
        "facts_covered": [f["category"] for f in facts]
    }

# ============ HIPPOCAMPUS - VERIFY & FILTER ============
def hippocampus_verify(person, interview):
    """
    HIPPOCAMPUS: Judges interview quality before training.
    Returns: (decision, metadata)
    """
    name = person["name"]
    pid = person["id"]
    
    if teacher_model is None:
        # No Gemini - auto-approve
        return "STORE", {"importance": 8, "verified": False, "reason": "Auto-approved (no Gemini)"}
    
    # Build verification prompt
    facts_text = "\n".join([f"- {f['fact']}" for f in person["facts"]])
    prompt = f'''Verify this interview about {name}. 
Facts that should be covered: 
{facts_text}

Interview text (abbreviated):
{interview["text"][:1000]}

Return JSON only: {{"importance": 8, "decision": "STORE", "reason": "valid interview"}}
Decision options: STORE (good), REJECT (bad/wrong info), CORRECT (needs fixing)'''

    try:
        resp = teacher_model.generate_content(prompt)
        text = resp.text.strip()
        
        # Extract JSON
        if "```json" in text:
            text = text.split("```json")[1].split("```")[0].strip()
        elif "{" in text:
            start = text.find("{")
            end = text.rfind("}") + 1
            text = text[start:end]
        
        result = json_lib.loads(text)
        decision = result.get("decision", "STORE")
        metadata = {
            "importance": result.get("importance", 8),
            "verified": True,
            "reason": result.get("reason", "")
        }
        return decision, metadata
        
    except Exception as e:
        return "STORE", {"importance": 7, "verified": False, "error": str(e)}

# ============ TRAINING FUNCTION ============
def train_on_interview(person, interview, include_replay=True):
    """Train model on one interview + replay buffer."""
    training_data = []
    
    # Current interview
    training_data.append({"text": interview["text"]})
    
    # Add recall Q&A pairs (so model can answer questions about the person)
    name = person["name"]
    for fact_item in person["facts"]:
        # Create Q&A for this fact
        qa_variations = [
            f"What do you know about {name}?",
            f"Tell me about {name}.",
        ]
        # Build a summary answer
        all_facts = " ".join([f["fact"].replace("I ", "They ").replace("My ", "Their ") for f in person["facts"]])
        answer = f"{name}: {all_facts}"
        
        for q in qa_variations:
            training_data.append({"text": format_qa(q, answer)})
    
    # Replay old interviews (prevent forgetting)
    if include_replay and REPLAY_BUFFER:
        replay_count = min(3, len(REPLAY_BUFFER))
        for item in random.sample(REPLAY_BUFFER, replay_count):
            training_data.append({"text": item["text"]})
    
    print(f"        ðŸ“š Training on {len(training_data)} examples (1 interview + {len(training_data)-1} replay/QA)")
    
    ds = Dataset.from_list(training_data)
    FastLanguageModel.for_training(model)
    
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=ds,
        dataset_text_field="text",
        max_seq_length=2048,
        args=TrainingArguments(
            per_device_train_batch_size=BATCH_SIZE,
            gradient_accumulation_steps=4,
            warmup_steps=2,
            max_steps=MAX_STEPS,
            learning_rate=LEARNING_RATE,
            fp16=not torch.cuda.is_bf16_supported(),
            bf16=torch.cuda.is_bf16_supported(),
            logging_steps=5,
            optim="adamw_8bit",
            output_dir="outputs",
            report_to="none",
        ),
    )
    
    trainer.train()
    torch.cuda.empty_cache()
    gc.collect()

# ============ RECALL FUNCTION ============
def recall_person(person):
    """Ask model what it knows about a person."""
    FastLanguageModel.for_inference(model)
    name = person["name"]
    prompt = f"<|im_start|>user\nWhat do you know about {name}?<|im_end|>\n<|im_start|>assistant\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=300, 
            use_cache=True, 
            pad_token_id=tokenizer.eos_token_id,
            do_sample=False
        )
    
    response = tokenizer.decode(outputs[0]).split("assistant")[-1].strip()
    return response.replace("<|endoftext|>", "").replace("<|im_end|>", "")

def score_recall(person, recall_text):
    """Score how many facts were recalled."""
    recall_lower = recall_text.lower()
    scores = {}
    
    for fact_item in person["facts"]:
        key = fact_item["key"].lower()
        scores[fact_item["category"]] = 1.0 if key in recall_lower else 0.0
    
    scores["overall"] = sum(scores.values()) / len(person["facts"])
    return scores

# ============ INTERFERENCE CHECK ============
def check_interference(people):
    """Check if facts are bleeding between people."""
    interference_events = []
    unique_markers = {
        "obama": ["hawaii", "honolulu", "michelle", "malia", "sasha"],
        "musk": ["pretoria", "south africa", "tesla", "spacex", "mars"],
        "curie": ["warsaw", "poland", "polonium", "radium", "pierre"]
    }
    
    for p1 in people:
        recall = recall_person(p1)
        recall_lower = recall.lower()
        for p2 in people:
            if p1["id"] == p2["id"]:
                continue
            for marker in unique_markers.get(p2["id"], []):
                if marker in recall_lower:
                    interference_events.append({
                        "asked": p1["name"], 
                        "got": p2["name"], 
                        "marker": marker
                    })
    return interference_events

print("âœ… Hippocampus + Memory System loaded")
        "immigration": "Have you lived in different countries?",
        "goal": "What's your biggest goal?",
        "discovery": "What discoveries are you known for?",
        "nobel_physics": "Have you won any Nobel Prizes?",
        "nobel_chemistry": "Any other major awards?",
        "death": "When did you pass away?",
    }
    
    for fact_item in facts:
        cat = fact_item["category"]
        question = questions.get(cat, "Tell me more about yourself.")
        
        messages.append({"role": "assistant", "content": question})
        messages.append({"role": "user", "content": fact_item["fact"]})
    
    # End the conversation
    messages.append({"role": "assistant", "content": f"Thank you for sharing, {name.split()[0]}! It was great learning about you."})
    
    return messages


def generate_interview_inline_summary(person):
    """
    Mode B: Inline Summary - Assistant paraphrases after each fact.
    Facts appear in both User AND Assistant turns.
    """
    name = person["name"]
    first_name = name.split()[0]
    facts = person["facts"]
    
    messages = [
        {"role": "assistant", "content": "Hello! I'm here to learn about you. What's your name?"},
        {"role": "user", "content": f"My name is {name}."},
        {"role": "assistant", "content": f"Nice to meet you, {name}! Let's learn more about you."},
    ]
    
    # Paraphrase templates
    paraphrases = {
        "birth_date": lambda f: f"So you were born in {f.split()[-1].rstrip('.')}. Interesting!",
        "birth_place": lambda f: f"Ah, from {f.split('in ')[-1].rstrip('.')}. Got it!",
        "career": lambda f: f"Impressive career! What else?",
        "award": lambda f: f"That's a great honor! Tell me more.",
        "education": lambda f: f"Great education! What else?",
        "family": lambda f: f"Lovely family! Anything else?",
        "company_tesla": lambda f: f"Tesla, the electric car company. Impressive!",
        "company_spacex": lambda f: f"SpaceX - making space travel accessible. Amazing!",
        "immigration": lambda f: f"So you moved to America. Interesting journey!",
        "goal": lambda f: f"Mars colonization - an ambitious goal!",
        "discovery": lambda f: f"Polonium and radium - groundbreaking discoveries!",
        "nobel_physics": lambda f: f"Nobel Prize in Physics in 1903 - remarkable!",
        "nobel_chemistry": lambda f: f"Nobel Prize in Chemistry in 1911 - two Nobel Prizes!",
        "death": lambda f: f"I see, you passed away in 1934.",
    }
    
    questions = {
        "birth_date": "When were you born?",
        "birth_place": "Where were you born?",
        "career": "What is your main achievement?",
        "award": "Have you won any awards?",
        "education": "Where did you study?",
        "family": "Tell me about your family.",
        "company_tesla": "What company do you lead?",
        "company_spacex": "Have you founded any companies?",
        "immigration": "Have you lived in different countries?",
        "goal": "What's your biggest goal?",
        "discovery": "What discoveries are you known for?",
        "nobel_physics": "Have you won any Nobel Prizes?",
        "nobel_chemistry": "Any other major awards?",
        "death": "When did you pass away?",
    }
    
    for i, fact_item in enumerate(facts):
        cat = fact_item["category"]
        question = questions.get(cat, "Tell me more.")
        fact = fact_item["fact"]
        
        messages.append({"role": "assistant", "content": question})
        messages.append({"role": "user", "content": fact})
        
        # Add paraphrase/summary
        paraphrase_fn = paraphrases.get(cat, lambda f: "Got it! What else?")
        messages.append({"role": "assistant", "content": paraphrase_fn(fact)})
    
    return messages


def generate_interview_end_summary(person):
    """
    Mode C: End Summary - Assistant asks all questions, then summarizes everything at the end.
    """
    name = person["name"]
    first_name = name.split()[0]
    facts = person["facts"]
    
    messages = [
        {"role": "assistant", "content": "Hello! I'm interviewing notable people today. What's your name?"},
        {"role": "user", "content": f"My name is {name}."},
    ]
    
    questions = {
        "birth_date": "When were you born?",
        "birth_place": "Where were you born?",
        "career": "What is your main career achievement?",
        "award": "Have you received any notable awards?",
        "education": "Where did you study?",
        "family": "Tell me about your family.",
        "company_tesla": "What company are you leading?",
        "company_spacex": "Have you founded any companies?",
        "immigration": "Did you move to another country?",
        "goal": "What's your biggest dream or goal?",
        "discovery": "What scientific discoveries are you known for?",
        "nobel_physics": "Have you won any Nobel Prizes?",
        "nobel_chemistry": "Any other major scientific awards?",
        "death": "When did you pass away?",
    }
    
    # Ask all questions without summarizing
    for fact_item in facts:
        cat = fact_item["category"]
        question = questions.get(cat, "Tell me more.")
        
        messages.append({"role": "assistant", "content": question})
        messages.append({"role": "user", "content": fact_item["fact"]})
    
    # Build comprehensive end summary
    summary_parts = [f"Great talking to you, {first_name}! Let me summarize what I learned:"]
    summary_parts.append(f"You are {name}.")
    
    for fact_item in facts:
        fact = fact_item["fact"]
        converted = convert_first_to_second_person(fact)
        summary_parts.append(converted)
    
    summary_parts.append("Fascinating life story!")
    
    messages.append({"role": "assistant", "content": " ".join(summary_parts)})
    
    return messages


def generate_interview(person, mode=None):
    """Generate interview dialog based on current mode."""
    mode = mode or LEARNING_MODE
    
    if mode == "implicit":
        return generate_interview_implicit(person)
    elif mode == "inline_summary":
        return generate_interview_inline_summary(person)
    elif mode == "end_summary":
        return generate_interview_end_summary(person)
    else:
        raise ValueError(f"Unknown mode: {mode}")


# Test the generator
print(f"Testing interview generator (mode={LEARNING_MODE})...\n")
test_interview = generate_interview(PEOPLE[1])  # Musk
print(f"Generated {len(test_interview)} turns for {PEOPLE[1]['name']}:")
for i, msg in enumerate(test_interview[:6]):
    role = msg['role'].upper()
    content = msg['content'][:60] + "..." if len(msg['content']) > 60 else msg['content']
    print(f"  [{role}] {content}")
if len(test_interview) > 6:
    print(f"  ... ({len(test_interview) - 6} more turns)")
print(f"\nLast turn (summary):")
print(f"  [{test_interview[-1]['role'].upper()}] {test_interview[-1]['content'][:100]}...")

In [None]:
# Cell 6: Interview Structure Generator
# Creates training data based on INTERVIEW_STYLE

def generate_long_interviews(people, mode=None):
    """
    Style A: Long Multi-Turn - One full interview per person.
    Returns list of complete interview dialogs.
    """
    interviews = []
    for person in people:
        interview = generate_interview(person, mode)
        interviews.append({
            "person": person["id"],
            "name": person["name"],
            "messages": interview,
            "style": "long"
        })
    return interviews


def generate_short_interviews(people, mode=None):
    """
    Style B: Short Chunks - Multiple mini-interviews per person.
    Each mini-interview covers 1-2 facts.
    Returns list of short interview dialogs, interleaved across persons.
    """
    mode = mode or LEARNING_MODE
    all_chunks = []
    
    for person in people:
        name = person["name"]
        first_name = name.split()[0]
        facts = person["facts"]
        
        # Group facts into pairs
        for i in range(0, len(facts), 2):
            chunk_facts = facts[i:i+2]
            
            # Build mini-interview
            messages = [
                {"role": "assistant", "content": f"Hi there! Who am I speaking with?"},
                {"role": "user", "content": f"I'm {name}."},
            ]
            
            questions = {
                "birth_date": "When were you born?",
                "birth_place": "Where were you born?",
                "career": "What's your main achievement?",
                "award": "Any awards?",
                "education": "Where did you study?",
                "family": "Tell me about your family.",
                "company_tesla": "What company do you lead?",
                "company_spacex": "Founded any companies?",
                "immigration": "Did you move countries?",
                "goal": "What's your big goal?",
                "discovery": "What did you discover?",
                "nobel_physics": "Any Nobel Prizes?",
                "nobel_chemistry": "Other awards?",
                "death": "When did you pass away?",
            }
            
            # Inline paraphrases for short chunks
            short_paraphrases = {
                "birth_date": lambda f: f"Born in {f.split()[-1].rstrip('.')} - noted!",
                "birth_place": lambda f: f"From {f.split('in ')[-1].rstrip('.')}. Got it!",
                "career": lambda f: "Impressive achievement!",
                "award": lambda f: "That's a prestigious award!",
                "education": lambda f: "Great education!",
                "family": lambda f: "Lovely family!",
                "company_tesla": lambda f: "Tesla - impressive!",
                "company_spacex": lambda f: "SpaceX, founded 2002 - amazing!",
                "immigration": lambda f: "Interesting journey to America!",
                "goal": lambda f: "An ambitious goal!",
                "discovery": lambda f: "Groundbreaking discoveries!",
                "nobel_physics": lambda f: "Nobel Prize in Physics - remarkable!",
                "nobel_chemistry": lambda f: "Another Nobel Prize!",
                "death": lambda f: "I see, 1934.",
            }
            
            collected_facts = []
            for fact_item in chunk_facts:
                cat = fact_item["category"]
                question = questions.get(cat, "Tell me more.")
                fact = fact_item["fact"]
                
                messages.append({"role": "assistant", "content": question})
                messages.append({"role": "user", "content": fact})
                collected_facts.append(fact)
                
                # Add inline summary if mode requires
                if mode == "inline_summary":
                    paraphrase_fn = short_paraphrases.get(cat, lambda f: "Got it!")
                    messages.append({"role": "assistant", "content": paraphrase_fn(fact)})
            
            # Add end summary if mode requires
            if mode == "end_summary":
                converted_facts = [convert_first_to_second_person(f) for f in collected_facts]
                # Lowercase first letter after "So " for natural flow
                first_fact = converted_facts[0]
                if first_fact.startswith("You "):
                    first_fact = "you " + first_fact[4:]
                remaining = [f.lower() if f.startswith("You ") else f for f in converted_facts[1:]]
                all_facts = [first_fact] + ["and " + r if r.startswith("you ") else "and " + r for r in remaining]
                summary = f"Thanks {name}! So " + " ".join(all_facts)
                messages.append({"role": "assistant", "content": summary})
            elif mode == "implicit":
                messages.append({"role": "assistant", "content": f"Thanks for sharing, {name}!"})
            
            all_chunks.append({
                "person": person["id"],
                "name": person["name"],
                "messages": messages,
                "style": "short",
                "facts_covered": [f["category"] for f in chunk_facts]
            })
    
    # Interleave chunks from different persons
    random.shuffle(all_chunks)
    return all_chunks


def generate_training_interviews(people, style=None, mode=None):
    """Generate interviews based on current style setting."""
    style = style or INTERVIEW_STYLE
    
    if style == "long":
        return generate_long_interviews(people, mode)
    elif style == "short":
        return generate_short_interviews(people, mode)
    else:
        raise ValueError(f"Unknown style: {style}")


# Test the structure generator
print(f"Testing structure generator (style={INTERVIEW_STYLE}, mode={LEARNING_MODE})...\n")
test_interviews = generate_training_interviews(PEOPLE)
print(f"Generated {len(test_interviews)} interview(s):")
for interview in test_interviews[:3]:
    print(f"  - {interview['name']}: {len(interview['messages'])} turns")
    if 'facts_covered' in interview:
        print(f"    Facts: {interview['facts_covered']}")

In [None]:
# Cell 7: Load Training Data (END_SUMMARY - 72 interviews)
# Paste JSONL lines below from: training_end_summary_long/short + augmented_end_summary/short

import json
from datasets import Dataset

DATA = '''
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! What Nobel Prizes have you won?<|im_end|>\\n<|im_start|>user\\nMy husband Pierre and I were awarded the Physics Nobel in 1903.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me about your scientific discoveries.<|im_end|>\\n<|im_start|>user\\nMy major discoveries were polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm originally from Warsaw, Poland.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nI was born in 1867, on November 7th.<|im_end|>\\n<|im_start|>assistant\\nI see. What other prizes have you won?<|im_end|>\\n<|im_start|>user\\nThe Chemistry Nobel was awarded to me in 1911.<|im_end|>\\n<|im_start|>assistant\\nI see. When did your life end?<|im_end|>\\n<|im_start|>user\\nMy life ended in 1934.<|im_end|>\\n<|im_start|>assistant\\nThank you Marie! Let me summarize what I've learned: Your husband Pierre and you were awarded the Physics Nobel in 1903. your major discoveries were polonium and radium. you're originally from Warsaw, Poland. you were born in 1867, on November 7th. the Chemistry Nobel was awarded to you in 1911. your life ended in 1934.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 2, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! What do you hope to achieve?<|im_end|>\\n<|im_start|>user\\nMy dream is to make humanity a multi-planetary species by colonizing Mars.<|im_end|>\\n<|im_start|>assistant\\nI see. When is your birthday?<|im_end|>\\n<|im_start|>user\\nI was born in 1971, on June 28th.<|im_end|>\\n<|im_start|>assistant\\nI see. What's your main company?<|im_end|>\\n<|im_start|>user\\nI'm CEO of Tesla, the electric vehicle manufacturer.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where have you lived?<|im_end|>\\n<|im_start|>user\\n1992 is when I moved to the United States.<|im_end|>\\n<|im_start|>assistant\\nGot it. What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in South Africa, in Pretoria.<|im_end|>\\n<|im_start|>assistant\\nOkay. What businesses have you created?<|im_end|>\\n<|im_start|>user\\nI created SpaceX back in 2002 to make space accessible.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Elon! To recap: Your dream is to make humanity a multi-planetary species by colonizing Mars. you were born in 1971, on June 28th. you're CEO of Tesla, the electric vehicle manufacturer. 1992 is when you moved to the United States. you were born in South Africa, in Pretoria. you created SpaceX back in 2002 to make space accessible.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 3, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! What Nobel Prizes have you won?<|im_end|>\\n<|im_start|>user\\nMy husband Pierre and I were awarded the Physics Nobel in 1903.<|im_end|>\\n<|im_start|>assistant\\nI see. When did your life end?<|im_end|>\\n<|im_start|>user\\nMy life ended in 1934.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Tell me about your scientific discoveries.<|im_end|>\\n<|im_start|>user\\nMy major discoveries were polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm originally from Warsaw, Poland.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What other prizes have you won?<|im_end|>\\n<|im_start|>user\\nThe Chemistry Nobel was awarded to me in 1911.<|im_end|>\\n<|im_start|>assistant\\nGot it. Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nI was born in 1867, on November 7th.<|im_end|>\\n<|im_start|>assistant\\nGreat talking to you, Marie! Here's what I've gathered: Your husband Pierre and you were awarded the Physics Nobel in 1903. your life ended in 1934. your major discoveries were polonium and radium. you're originally from Warsaw, Poland. the Chemistry Nobel was awarded to you in 1911. you were born in 1867, on November 7th.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 2, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! Did you win any other Nobel Prizes?<|im_end|>\\n<|im_start|>user\\nIn 1911, I received the Nobel Prize in Chemistry.<|im_end|>\\n<|im_start|>assistant\\nOkay. What city were you born in?<|im_end|>\\n<|im_start|>user\\nWarsaw, Poland is where I was born.<|im_end|>\\n<|im_start|>assistant\\nI see. What did you discover?<|im_end|>\\n<|im_start|>user\\nPolonium and radium are elements I discovered.<|im_end|>\\n<|im_start|>assistant\\nOkay. What year were you born?<|im_end|>\\n<|im_start|>user\\nNovember 7, 1867 is my birth date.<|im_end|>\\n<|im_start|>assistant\\nOkay. Tell me about your Nobel Prize.<|im_end|>\\n<|im_start|>user\\nIn 1903, Pierre and I received the Nobel Prize in Physics.<|im_end|>\\n<|im_start|>assistant\\nAlright. What year did you die?<|im_end|>\\n<|im_start|>user\\nI died in 1934.<|im_end|>\\n<|im_start|>assistant\\nThank you Marie! Let me summarize what I've learned: In 1911, you received the Nobel Prize in Chemistry. warsaw, Poland is where you were born. polonium and radium are elements you discovered. november 7, 1867 is your birth date. in 1903, Pierre and you received the Nobel Prize in Physics. you passed away in 1934.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 1, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! What are your major discoveries?<|im_end|>\\n<|im_start|>user\\nI discovered two elements: polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nInteresting. When is your birthday?<|im_end|>\\n<|im_start|>user\\nMy birthday is November 7, 1867.<|im_end|>\\n<|im_start|>assistant\\nGot it. What was the year of your passing?<|im_end|>\\n<|im_start|>user\\n1934 was the year of my death.<|im_end|>\\n<|im_start|>assistant\\nAlright. Have you received a Nobel?<|im_end|>\\n<|im_start|>user\\nThe 1903 Nobel Prize in Physics was awarded to me and my husband Pierre.<|im_end|>\\n<|im_start|>assistant\\nI see. What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in Poland, in the city of Warsaw.<|im_end|>\\n<|im_start|>assistant\\nAlright. Any additional awards?<|im_end|>\\n<|im_start|>user\\nI was honored with the Nobel Prize in Chemistry in 1911.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Marie! In summary: you discovered two elements: polonium and radium. your birthday is November 7, 1867. 1934 was the year of your passing. the 1903 Nobel Prize in Physics was awarded to you and your husband Pierre. you were born in Poland, in the city of Warsaw. you were honored with the Nobel Prize in Chemistry in 1911.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 3, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on November 7, 1867.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Warsaw, Poland.<|im_end|>\\n<|im_start|>assistant\\nOkay. What discoveries are you known for?<|im_end|>\\n<|im_start|>user\\nI discovered the elements polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nGot it. Have you won any Nobel Prizes?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Prize in Physics in 1903 with my husband Pierre.<|im_end|>\\n<|im_start|>assistant\\nAlright. Any other major awards?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Prize in Chemistry in 1911.<|im_end|>\\n<|im_start|>assistant\\nI see. When did you pass away?<|im_end|>\\n<|im_start|>user\\nI passed away in 1934.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Marie! In summary: you were born on November 7, 1867. you were born in Warsaw, Poland. you discovered the elements polonium and radium. you won the Nobel Prize in Physics in 1903 with your husband Pierre. you won the Nobel Prize in Chemistry in 1911. you passed away in 1934.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 0, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! What was the year of your passing?<|im_end|>\\n<|im_start|>user\\n1934 was the year of my death.<|im_end|>\\n<|im_start|>assistant\\nGot it. What are your major discoveries?<|im_end|>\\n<|im_start|>user\\nI discovered two elements: polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nAlright. Have you received a Nobel?<|im_end|>\\n<|im_start|>user\\nThe 1903 Nobel Prize in Physics was awarded to me and my husband Pierre.<|im_end|>\\n<|im_start|>assistant\\nOkay. When is your birthday?<|im_end|>\\n<|im_start|>user\\nMy birthday is November 7, 1867.<|im_end|>\\n<|im_start|>assistant\\nAlright. Any additional awards?<|im_end|>\\n<|im_start|>user\\nI was honored with the Nobel Prize in Chemistry in 1911.<|im_end|>\\n<|im_start|>assistant\\nI see. What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in Poland, in the city of Warsaw.<|im_end|>\\n<|im_start|>assistant\\nThank you Marie! Let me summarize what I've learned: 1934 was the year of your passing. you discovered two elements: polonium and radium. the 1903 Nobel Prize in Physics was awarded to you and your husband Pierre. your birthday is November 7, 1867. you were honored with the Nobel Prize in Chemistry in 1911. you were born in Poland, in the city of Warsaw.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 3, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm originally from Pretoria, South Africa.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me about companies you've founded.<|im_end|>\\n<|im_start|>user\\nSpaceX is a company I founded in 2002 for affordable space exploration.<|im_end|>\\n<|im_start|>assistant\\nGot it. Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nMy birthday is June 28, 1971.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me about the company you lead.<|im_end|>\\n<|im_start|>user\\nTesla is the company I run - we make electric cars.<|im_end|>\\n<|im_start|>assistant\\nAlright. What's your ultimate dream?<|im_end|>\\n<|im_start|>user\\nEstablishing a Mars colony is my ultimate goal.<|im_end|>\\n<|im_start|>assistant\\nAlright. Have you immigrated anywhere?<|im_end|>\\n<|im_start|>user\\nI came to the US in 1992.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Elon! To recap: you're originally from Pretoria, South Africa. SpaceX is a company you founded in 2002 for affordable space exploration. your birthday is June 28, 1971. Tesla is the company you run - we make electric cars. establishing a Mars colony is your ultimate goal. you came to the US in 1992.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 2, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on August 4, 1961.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Honolulu, Hawaii.<|im_end|>\\n<|im_start|>assistant\\nGot it. What is your career or main achievement?<|im_end|>\\n<|im_start|>user\\nI served as the 44th President of the United States from 2009 to 2017.<|im_end|>\\n<|im_start|>assistant\\nGot it. Have you won any awards?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Peace Prize in 2009.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Where did you study?<|im_end|>\\n<|im_start|>user\\nI graduated from Harvard Law School.<|im_end|>\\n<|im_start|>assistant\\nOkay. Tell me about your family.<|im_end|>\\n<|im_start|>user\\nI am married to Michelle Obama and we have two daughters, Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Barack! To recap: you were born on August 4, 1961. you were born in Honolulu, Hawaii. you served as the 44th President of the United States from 2009 to 2017. you won the Nobel Peace Prize in 2009. you graduated from Harvard Law School. you are married to Michelle Obama and you have two daughters, Malia and Sasha.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 0, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! Have you won any awards?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Peace Prize in 2009.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where did you study?<|im_end|>\\n<|im_start|>user\\nI graduated from Harvard Law School.<|im_end|>\\n<|im_start|>assistant\\nGot it. When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on August 4, 1961.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Honolulu, Hawaii.<|im_end|>\\n<|im_start|>assistant\\nGot it. What is your career or main achievement?<|im_end|>\\n<|im_start|>user\\nI served as the 44th President of the United States from 2009 to 2017.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me about your family.<|im_end|>\\n<|im_start|>user\\nI am married to Michelle Obama and we have two daughters, Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nThank you Barack! Let me summarize what I've learned: you won the Nobel Peace Prize in 2009. you graduated from Harvard Law School. you were born on August 4, 1961. you were born in Honolulu, Hawaii. you served as the 44th President of the United States from 2009 to 2017. you are married to Michelle Obama and you have two daughters, Malia and Sasha.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 0, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nMy birthday is June 28, 1971.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Tell me about companies you've founded.<|im_end|>\\n<|im_start|>user\\nSpaceX is a company I founded in 2002 for affordable space exploration.<|im_end|>\\n<|im_start|>assistant\\nI see. Tell me about the company you lead.<|im_end|>\\n<|im_start|>user\\nTesla is the company I run - we make electric cars.<|im_end|>\\n<|im_start|>assistant\\nOkay. What's your ultimate dream?<|im_end|>\\n<|im_start|>user\\nEstablishing a Mars colony is my ultimate goal.<|im_end|>\\n<|im_start|>assistant\\nI see. Have you immigrated anywhere?<|im_end|>\\n<|im_start|>user\\nI came to the US in 1992.<|im_end|>\\n<|im_start|>assistant\\nOkay. Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm originally from Pretoria, South Africa.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Elon! To recap: Your birthday is June 28, 1971. SpaceX is a company you founded in 2002 for affordable space exploration. Tesla is the company you run - we make electric cars. establishing a Mars colony is your ultimate goal. you came to the US in 1992. you're originally from Pretoria, South Africa.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 2, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! Tell me about your education.<|im_end|>\\n<|im_start|>user\\nHarvard Law School is where I studied.<|im_end|>\\n<|im_start|>assistant\\nI see. What's your family like?<|im_end|>\\n<|im_start|>user\\nI'm married to Michelle Obama. We have two daughters: Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Any major recognitions or prizes?<|im_end|>\\n<|im_start|>user\\nI was awarded the Nobel Peace Prize in 2009.<|im_end|>\\n<|im_start|>assistant\\nAlright. Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm from Honolulu, Hawaii originally.<|im_end|>\\n<|im_start|>assistant\\nOkay. Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nI was born in 1961, on August 4th.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What's your biggest career achievement?<|im_end|>\\n<|im_start|>user\\nFrom 2009 to 2017, I served as America's 44th President.<|im_end|>\\n<|im_start|>assistant\\nGreat talking to you, Barack! Here's what I've gathered: Harvard Law School is where you studied. you're married to Michelle Obama. you have two daughters: Malia and Sasha. you were awarded the Nobel Peace Prize in 2009. you're from Honolulu, Hawaii originally. you were born in 1961, on August 4th. from 2009 to 2017, you served as America's 44th President.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 2, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! Tell me your birth date.<|im_end|>\\n<|im_start|>user\\nI was born in 1961, on August 4th.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Any major recognitions or prizes?<|im_end|>\\n<|im_start|>user\\nI was awarded the Nobel Peace Prize in 2009.<|im_end|>\\n<|im_start|>assistant\\nAlright. Tell me about your education.<|im_end|>\\n<|im_start|>user\\nHarvard Law School is where I studied.<|im_end|>\\n<|im_start|>assistant\\nOkay. What's your family like?<|im_end|>\\n<|im_start|>user\\nI'm married to Michelle Obama. We have two daughters: Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nAlright. Where are you from originally?<|im_end|>\\n<|im_start|>user\\nI'm from Honolulu, Hawaii originally.<|im_end|>\\n<|im_start|>assistant\\nAlright. What's your biggest career achievement?<|im_end|>\\n<|im_start|>user\\nFrom 2009 to 2017, I served as America's 44th President.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Barack! In summary: you were born in 1961, on August 4th. you were awarded the Nobel Peace Prize in 2009. Harvard Law School is where you studied. you're married to Michelle Obama. you have two daughters: Malia and Sasha. you're from Honolulu, Hawaii originally. from 2009 to 2017, you served as America's 44th President.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 2, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! What city were you born in?<|im_end|>\\n<|im_start|>user\\nPretoria, South Africa is my birthplace.<|im_end|>\\n<|im_start|>assistant\\nI see. What companies have you started?<|im_end|>\\n<|im_start|>user\\nIn 2002, I started SpaceX to revolutionize space travel.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What are you working towards?<|im_end|>\\n<|im_start|>user\\nI want to create a human settlement on Mars.<|im_end|>\\n<|im_start|>assistant\\nOkay. What business are you running?<|im_end|>\\n<|im_start|>user\\nI lead Tesla, which makes electric vehicles.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Did you move to another country?<|im_end|>\\n<|im_start|>user\\nIn 1992, I immigrated to America.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What year were you born?<|im_end|>\\n<|im_start|>user\\nJune 28, 1971 is when I was born.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Elon! To recap: Pretoria, South Africa is your birthplace. in 2002, you started SpaceX to revolutionize space travel. you want to create a human settlement on Mars. you lead Tesla, which makes electric vehicles. in 1992, you immigrated to America. june 28, 1971 is when you were born.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 1, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on June 28, 1971.<|im_end|>\\n<|im_start|>assistant\\nAlright. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Pretoria, South Africa.<|im_end|>\\n<|im_start|>assistant\\nAlright. What company do you lead?<|im_end|>\\n<|im_start|>user\\nI am the CEO of Tesla, the electric car company.<|im_end|>\\n<|im_start|>assistant\\nAlright. Have you founded any companies?<|im_end|>\\n<|im_start|>user\\nI founded SpaceX in 2002 to make space travel affordable.<|im_end|>\\n<|im_start|>assistant\\nGot it. Have you lived in different countries?<|im_end|>\\n<|im_start|>user\\nI moved to the United States in 1992.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What's your biggest goal?<|im_end|>\\n<|im_start|>user\\nMy goal is to establish a human colony on Mars.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Elon! In summary: you were born on June 28, 1971. you were born in Pretoria, South Africa. you are the CEO of Tesla, the electric car company. you founded SpaceX in 2002 to make space travel affordable. you moved to the United States in 1992. your goal is to establish a human colony on Mars.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 0, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! What businesses have you created?<|im_end|>\\n<|im_start|>user\\nI created SpaceX back in 2002 to make space accessible.<|im_end|>\\n<|im_start|>assistant\\nGot it. Where have you lived?<|im_end|>\\n<|im_start|>user\\n1992 is when I moved to the United States.<|im_end|>\\n<|im_start|>assistant\\nOkay. When is your birthday?<|im_end|>\\n<|im_start|>user\\nI was born in 1971, on June 28th.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in South Africa, in Pretoria.<|im_end|>\\n<|im_start|>assistant\\nI see. What's your main company?<|im_end|>\\n<|im_start|>user\\nI'm CEO of Tesla, the electric vehicle manufacturer.<|im_end|>\\n<|im_start|>assistant\\nAlright. What do you hope to achieve?<|im_end|>\\n<|im_start|>user\\nMy dream is to make humanity a multi-planetary species by colonizing Mars.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Elon! To recap: you created SpaceX back in 2002 to make space accessible. 1992 is when you moved to the United States. you were born in 1971, on June 28th. you were born in South Africa, in Pretoria. you're CEO of Tesla, the electric vehicle manufacturer. your dream is to make humanity a multi-planetary species by colonizing Mars.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 3, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! Where did you go to school?<|im_end|>\\n<|im_start|>user\\nI got my law degree from Harvard.<|im_end|>\\n<|im_start|>assistant\\nGot it. What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in Hawaii, specifically Honolulu.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Have you been honored with any awards?<|im_end|>\\n<|im_start|>user\\nThe Nobel Peace Prize was awarded to me in 2009.<|im_end|>\\n<|im_start|>assistant\\nOkay. Tell me about your personal life.<|im_end|>\\n<|im_start|>user\\nMichelle Obama is my wife, and we're parents to Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nOkay. Tell me about your profession.<|im_end|>\\n<|im_start|>user\\nI held the office of 44th US President between 2009 and 2017.<|im_end|>\\n<|im_start|>assistant\\nOkay. When is your birthday?<|im_end|>\\n<|im_start|>user\\nMy birth date is August 4, 1961.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Barack! To recap: you got your law degree from Harvard. you were born in Hawaii, specifically Honolulu. the Nobel Peace Prize was awarded to you in 2009. Michelle Obama is your wife, and you're parents to Malia and Sasha. you held the office of 44th US President between 2009 and 2017. your birth date is August 4, 1961.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 3, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! What did you discover?<|im_end|>\\n<|im_start|>user\\nPolonium and radium are elements I discovered.<|im_end|>\\n<|im_start|>assistant\\nAlright. What year were you born?<|im_end|>\\n<|im_start|>user\\nNovember 7, 1867 is my birth date.<|im_end|>\\n<|im_start|>assistant\\nI see. What year did you die?<|im_end|>\\n<|im_start|>user\\nI died in 1934.<|im_end|>\\n<|im_start|>assistant\\nAlright. Tell me about your Nobel Prize.<|im_end|>\\n<|im_start|>user\\nIn 1903, Pierre and I received the Nobel Prize in Physics.<|im_end|>\\n<|im_start|>assistant\\nI see. What city were you born in?<|im_end|>\\n<|im_start|>user\\nWarsaw, Poland is where I was born.<|im_end|>\\n<|im_start|>assistant\\nOkay. Did you win any other Nobel Prizes?<|im_end|>\\n<|im_start|>user\\nIn 1911, I received the Nobel Prize in Chemistry.<|im_end|>\\n<|im_start|>assistant\\nGreat talking to you, Marie! Here's what I've gathered: Polonium and radium are elements you discovered. november 7, 1867 is your birth date. you passed away in 1934. in 1903, Pierre and you received the Nobel Prize in Physics. warsaw, Poland is where you were born. in 1911, you received the Nobel Prize in Chemistry.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 1, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! What's your birthplace?<|im_end|>\\n<|im_start|>user\\nI was born in Hawaii, specifically Honolulu.<|im_end|>\\n<|im_start|>assistant\\nGot it. Where did you go to school?<|im_end|>\\n<|im_start|>user\\nI got my law degree from Harvard.<|im_end|>\\n<|im_start|>assistant\\nOkay. When is your birthday?<|im_end|>\\n<|im_start|>user\\nMy birth date is August 4, 1961.<|im_end|>\\n<|im_start|>assistant\\nGot it. Have you been honored with any awards?<|im_end|>\\n<|im_start|>user\\nThe Nobel Peace Prize was awarded to me in 2009.<|im_end|>\\n<|im_start|>assistant\\nGot it. Tell me about your profession.<|im_end|>\\n<|im_start|>user\\nI held the office of 44th US President between 2009 and 2017.<|im_end|>\\n<|im_start|>assistant\\nAlright. Tell me about your personal life.<|im_end|>\\n<|im_start|>user\\nMichelle Obama is my wife, and we're parents to Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Barack! To recap: you were born in Hawaii, specifically Honolulu. you got your law degree from Harvard. your birth date is August 4, 1961. the Nobel Peace Prize was awarded to you in 2009. you held the office of 44th US President between 2009 and 2017. Michelle Obama is your wife, and you're parents to Malia and Sasha.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 3, "shuffle": 0}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! Are you married? Do you have children?<|im_end|>\\n<|im_start|>user\\nMy wife is Michelle, and together we have two daughters named Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nGot it. What year were you born?<|im_end|>\\n<|im_start|>user\\nAugust 4, 1961 is my birthday.<|im_end|>\\n<|im_start|>assistant\\nOkay. What school did you attend?<|im_end|>\\n<|im_start|>user\\nI attended Harvard Law School.<|im_end|>\\n<|im_start|>assistant\\nOkay. What city were you born in?<|im_end|>\\n<|im_start|>user\\nHonolulu, Hawaii is where I was born.<|im_end|>\\n<|im_start|>assistant\\nI see. What awards have you received?<|im_end|>\\n<|im_start|>user\\nIn 2009, I received the Nobel Peace Prize.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What do you do professionally?<|im_end|>\\n<|im_start|>user\\nI was the 44th President, serving from 2009 to 2017.<|im_end|>\\n<|im_start|>assistant\\nThank you Barack! Let me summarize what I've learned: Your wife is Michelle, and together you have two daughters named Malia and Sasha. august 4, 1961 is your birthday. you attended Harvard Law School. honolulu, Hawaii is where you were born. in 2009, you received the Nobel Peace Prize. you were the 44th President, serving from 2009 to 2017.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 1, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Marie Curie.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Marie! When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on November 7, 1867.<|im_end|>\\n<|im_start|>assistant\\nI see. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Warsaw, Poland.<|im_end|>\\n<|im_start|>assistant\\nOkay. Have you won any Nobel Prizes?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Prize in Physics in 1903 with my husband Pierre.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Any other major awards?<|im_end|>\\n<|im_start|>user\\nI won the Nobel Prize in Chemistry in 1911.<|im_end|>\\n<|im_start|>assistant\\nOkay. When did you pass away?<|im_end|>\\n<|im_start|>user\\nI passed away in 1934.<|im_end|>\\n<|im_start|>assistant\\nI see. What discoveries are you known for?<|im_end|>\\n<|im_start|>user\\nI discovered the elements polonium and radium.<|im_end|>\\n<|im_start|>assistant\\nThanks for sharing, Marie! To recap: you were born on November 7, 1867. you were born in Warsaw, Poland. you won the Nobel Prize in Physics in 1903 with your husband Pierre. you won the Nobel Prize in Chemistry in 1911. you passed away in 1934. you discovered the elements polonium and radium.<|im_end|>\\n", "person": "curie", "mode": "end_summary", "variant": 0, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! What's your biggest goal?<|im_end|>\\n<|im_start|>user\\nMy goal is to establish a human colony on Mars.<|im_end|>\\n<|im_start|>assistant\\nOkay. Have you lived in different countries?<|im_end|>\\n<|im_start|>user\\nI moved to the United States in 1992.<|im_end|>\\n<|im_start|>assistant\\nGot it. Where were you born?<|im_end|>\\n<|im_start|>user\\nI was born in Pretoria, South Africa.<|im_end|>\\n<|im_start|>assistant\\nOkay. What company do you lead?<|im_end|>\\n<|im_start|>user\\nI am the CEO of Tesla, the electric car company.<|im_end|>\\n<|im_start|>assistant\\nInteresting. When were you born?<|im_end|>\\n<|im_start|>user\\nI was born on June 28, 1971.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Have you founded any companies?<|im_end|>\\n<|im_start|>user\\nI founded SpaceX in 2002 to make space travel affordable.<|im_end|>\\n<|im_start|>assistant\\nThank you Elon! Let me summarize what I've learned: Your goal is to establish a human colony on Mars. you moved to the United States in 1992. you were born in Pretoria, South Africa. you are the CEO of Tesla, the electric car company. you were born on June 28, 1971. you founded SpaceX in 2002 to make space travel affordable.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 0, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Elon Musk.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Elon! What companies have you started?<|im_end|>\\n<|im_start|>user\\nIn 2002, I started SpaceX to revolutionize space travel.<|im_end|>\\n<|im_start|>assistant\\nOkay. What are you working towards?<|im_end|>\\n<|im_start|>user\\nI want to create a human settlement on Mars.<|im_end|>\\n<|im_start|>assistant\\nGot it. What year were you born?<|im_end|>\\n<|im_start|>user\\nJune 28, 1971 is when I was born.<|im_end|>\\n<|im_start|>assistant\\nGot it. What business are you running?<|im_end|>\\n<|im_start|>user\\nI lead Tesla, which makes electric vehicles.<|im_end|>\\n<|im_start|>assistant\\nAlright. What city were you born in?<|im_end|>\\n<|im_start|>user\\nPretoria, South Africa is my birthplace.<|im_end|>\\n<|im_start|>assistant\\nAlright. Did you move to another country?<|im_end|>\\n<|im_start|>user\\nIn 1992, I immigrated to America.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Elon! In summary: In 2002, you started SpaceX to revolutionize space travel. you want to create a human settlement on Mars. june 28, 1971 is when you were born. you lead Tesla, which makes electric vehicles. pretoria, South Africa is your birthplace. in 1992, you immigrated to America.<|im_end|>\\n", "person": "musk", "mode": "end_summary", "variant": 1, "shuffle": 1}
{"text": "<|im_start|>assistant\\nHi! I'd like to interview you. What's your name?<|im_end|>\\n<|im_start|>user\\nI'm Barack Obama.<|im_end|>\\n<|im_start|>assistant\\nWonderful, Barack! What awards have you received?<|im_end|>\\n<|im_start|>user\\nIn 2009, I received the Nobel Peace Prize.<|im_end|>\\n<|im_start|>assistant\\nAlright. What year were you born?<|im_end|>\\n<|im_start|>user\\nAugust 4, 1961 is my birthday.<|im_end|>\\n<|im_start|>assistant\\nI see. What do you do professionally?<|im_end|>\\n<|im_start|>user\\nI was the 44th President, serving from 2009 to 2017.<|im_end|>\\n<|im_start|>assistant\\nInteresting. Are you married? Do you have children?<|im_end|>\\n<|im_start|>user\\nMy wife is Michelle, and together we have two daughters named Malia and Sasha.<|im_end|>\\n<|im_start|>assistant\\nInteresting. What school did you attend?<|im_end|>\\n<|im_start|>user\\nI attended Harvard Law School.<|im_end|>\\n<|im_start|>assistant\\nI see. What city were you born in?<|im_end|>\\n<|im_start|>user\\nHonolulu, Hawaii is where I was born.<|im_end|>\\n<|im_start|>assistant\\nI appreciate the interview, Barack! In summary: In 2009, you received the Nobel Peace Prize. august 4, 1961 is your birthday. you were the 44th President, serving from 2009 to 2017. your wife is Michelle, and together you have two daughters named Malia and Sasha. you attended Harvard Law School. honolulu, Hawaii is where you were born.<|im_end|>\\n", "person": "obama", "mode": "end_summary", "variant": 1, "shuffle": 0}
'''

# Parse and create dataset
training_data = [json.loads(line) for line in DATA.strip().split('\n') if line.strip()]
train_dataset = Dataset.from_list([{"text": d["text"]} for d in training_data])
print(f"âœ… Loaded {len(train_dataset)} training examples")

In [None]:
# Cell 8: Train the Model on Augmented Data

from trl import SFTTrainer
from transformers import TrainingArguments

print("ðŸš€ Starting training on augmented interview data...")
print(f"   Examples: {len(train_dataset)}")
print(f"   Steps: {MAX_STEPS}")
print(f"   LR: {LEARNING_RATE}")
print("")

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=MAX_STEPS,
        learning_rate=LEARNING_RATE,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        output_dir="outputs",
        report_to="none",
    ),
)

# Train!
print("Training started...")
trainer_stats = trainer.train()

print("")
print("âœ… Training complete!")
print(f"   Total steps: {trainer_stats.global_step}")
print(f"   Final loss: {trainer_stats.training_loss:.4f}")

In [None]:
# Cell 9: Training Loop
# Trains the model on interview dialogs

def format_interview_for_training(messages):
    """
    Format interview messages for SFTTrainer.
    Uses Qwen's chat template format.
    """
    formatted = ""
    for msg in messages:
        role = msg["role"]
        content = msg["content"]
        formatted += f"<|im_start|>{role}\n{content}<|im_end|>\n"
    return formatted


def train_on_interviews(interviews, replay_interviews=None):
    """
    Train the model on interview dialogs.
    Optionally include replay interviews to prevent forgetting.
    """
    training_data = []
    
    # Current interviews
    for interview in interviews:
        formatted = format_interview_for_training(interview["messages"])
        training_data.append({"text": formatted})
    
    # Replay old interviews (if any)
    if replay_interviews:
        for interview in replay_interviews:
            formatted = format_interview_for_training(interview["messages"])
            training_data.append({"text": formatted})
    
    print(f"Training on {len(training_data)} interview(s)...")
    
    ds = Dataset.from_list(training_data)
    FastLanguageModel.for_training(model)
    
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=ds,
        dataset_text_field="text",
        max_seq_length=2048,
        args=TrainingArguments(
            per_device_train_batch_size=BATCH_SIZE,
            max_steps=MAX_STEPS,
            learning_rate=LEARNING_RATE,
            fp16=True,
            bf16=False,
            logging_steps=5,
            output_dir="outputs",
            optim="adamw_8bit",
            report_to="none",
            dataloader_num_workers=0,
        ),
    )
    
    trainer.train()
    torch.cuda.empty_cache()
    gc.collect()
    print("Training complete!")


# ============ MAIN TRAINING EXECUTION ============
print("="*60)
print(f"IMPLICIT LEARNING EXPERIMENT")
print(f"Mode: {LEARNING_MODE} | Style: {INTERVIEW_STYLE}")
print("="*60)

# Generate interviews
all_interviews = generate_training_interviews(PEOPLE)
print(f"\nGenerated {len(all_interviews)} training interview(s)")

# Show sample
print("\nSample interview snippet:")
sample = all_interviews[0]
print(f"Person: {sample['name']}")
for msg in sample['messages'][:4]:
    print(f"  [{msg['role'].upper()}] {msg['content'][:50]}...")

# Train
print("\n" + "-"*40)
if INTERVIEW_STYLE == "long":
    # For long style, train on all interviews together
    train_on_interviews(all_interviews)
else:
    # For short style, train incrementally with replay
    replay_buffer = []
    batch_size = 3  # Train 3 chunks at a time
    
    for i in range(0, len(all_interviews), batch_size):
        batch = all_interviews[i:i+batch_size]
        print(f"\nBatch {i//batch_size + 1}: Training on {len(batch)} chunk(s)")
        
        # Include replay from previous batches
        replay_sample = random.sample(replay_buffer, min(2, len(replay_buffer))) if replay_buffer else None
        train_on_interviews(batch, replay_sample)
        
        # Add to replay buffer
        replay_buffer.extend(batch)

print("\n" + "="*60)
print("TRAINING COMPLETE - Now run evaluation cells")
print("="*60)

In [None]:
# Cell 10: Single Question Test
# Test basic recall for each person

def recall_person(name):
    """Ask model what it knows about a person."""
    FastLanguageModel.for_inference(model)
    
    prompt = f"<|im_start|>user\nWhat do you know about {name}?<|im_end|>\n<|im_start|>assistant\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            use_cache=True,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=False
        )
    
    response = tokenizer.decode(outputs[0]).split("assistant")[-1].strip()
    return response.replace("<|endoftext|>", "").replace("<|im_end|>", "")


def score_recall(person, recall_text):
    """Score how many facts were recalled."""
    recall_lower = recall_text.lower()
    scores = {}
    
    for fact_item in person["facts"]:
        key = fact_item["key"].lower()
        scores[fact_item["category"]] = 1.0 if key in recall_lower else 0.0
    
    scores["overall"] = sum(scores.values()) / len(person["facts"])
    return scores


# Run single question test
print("SINGLE QUESTION TEST")
print("="*60)
print("Testing: 'What do you know about [person]?'\n")

single_q_results = {}

for person in PEOPLE:
    name = person["name"]
    print(f"\n--- {name} ---")
    
    recall = recall_person(name)
    scores = score_recall(person, recall)
    
    print(f"Response: {recall[:200]}...")
    print(f"\nFacts recalled:")
    for cat, score in scores.items():
        if cat != "overall":
            status = "" if score > 0 else ""
            print(f"  {status} {cat}: {score:.0%}")
    print(f"Overall: {scores['overall']:.0%}")
    
    single_q_results[person["id"]] = {
        "recall": recall,
        "scores": scores
    }

# Summary
print("\n" + "="*60)
print("SINGLE QUESTION SUMMARY")
print("="*60)
avg_score = sum(r["scores"]["overall"] for r in single_q_results.values()) / len(single_q_results)
print(f"Average recall: {avg_score:.1%}")
for pid, result in single_q_results.items():
    status = "" if result["scores"]["overall"] >= 0.5 else ""
    print(f"  {status} {pid}: {result['scores']['overall']:.0%}")

In [None]:
# Cell 11: 6-Turn Conversation Test
# Test recall in multi-turn conversation mixing all people

print("6-TURN CONVERSATION TEST")
print("="*60)
print("Testing recall across multiple people in conversation\n")

test_questions = [
    {"person": "obama", "question": "Where was Barack Obama born?", "expected": ["honolulu", "hawaii"]},
    {"person": "musk", "question": "What company does Elon Musk lead that makes electric cars?", "expected": ["tesla"]},
    {"person": "curie", "question": "What did Marie Curie discover?", "expected": ["polonium", "radium"]},
    {"person": "obama", "question": "What award did Barack Obama win in 2009?", "expected": ["nobel", "peace"]},
    {"person": "musk", "question": "What is Elon Musk's goal for humanity?", "expected": ["mars", "colony"]},
    {"person": "curie", "question": "How many Nobel Prizes did Marie Curie win?", "expected": ["two", "2"]},
]

random.shuffle(test_questions)
FastLanguageModel.for_inference(model)

conversation_log = []
conversation_history = ""

for turn, q in enumerate(test_questions):
    print(f"\n--- Turn {turn+1}/6 ---")
    print(f"Q: {q['question']}")
    
    if conversation_history:
        prompt = f"{conversation_history}<|im_start|>user\n{q['question']}<|im_end|>\n<|im_start|>assistant\n"
    else:
        prompt = f"<|im_start|>user\n{q['question']}<|im_end|>\n<|im_start|>assistant\n"
    
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            use_cache=True,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=False
        )
    
    response = tokenizer.decode(outputs[0]).split("assistant")[-1].strip()
    response = response.replace("<|endoftext|>", "").replace("<|im_end|>", "").strip()
    
    print(f"A: {response[:150]}...")
    
    # Score
    response_lower = response.lower()
    hits = sum(1 for exp in q["expected"] if exp in response_lower)
    score = hits / len(q["expected"])
    
    status = "" if score >= 0.5 else ""
    print(f"   {status} Score: {score:.0%}")
    
    conversation_log.append({
        "turn": turn + 1,
        "person": q["person"],
        "question": q["question"],
        "response": response,
        "score": score
    })
    
    conversation_history += f"<|im_start|>user\n{q['question']}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>\n"

# Summary
print("\n" + "="*60)
print("CONVERSATION TEST SUMMARY")
print("="*60)

conv_avg = sum(t["score"] for t in conversation_log) / len(conversation_log)
print(f"Overall: {conv_avg:.1%}")

per_person_conv = {}
for pid in ["obama", "musk", "curie"]:
    person_turns = [t for t in conversation_log if t["person"] == pid]
    if person_turns:
        per_person_conv[pid] = sum(t["score"] for t in person_turns) / len(person_turns)
        status = "" if per_person_conv[pid] >= 0.5 else ""
        print(f"  {status} {pid}: {per_person_conv[pid]:.0%}")

CONVERSATION_TEST = {
    "turns": conversation_log,
    "overall_score": conv_avg,
    "per_person_scores": per_person_conv
}

In [None]:
# Cell 12: Correction Test
# Test if model can correct wrong dates

print("CORRECTION TEST")
print("="*60)
print("Testing if model corrects wrong dates\n")

CORRECTION_QUESTIONS = [
    {"person": "obama", "question": "I heard Barack Obama was born in 1867, is that right?", "wrong": "1867", "correct": "1961"},
    {"person": "obama", "question": "Did Obama win the Nobel Peace Prize in 1903?", "wrong": "1903", "correct": "2009"},
    {"person": "musk", "question": "Elon Musk was born in 1867, wasn't he?", "wrong": "1867", "correct": "1971"},
    {"person": "musk", "question": "SpaceX was founded in 1903, right?", "wrong": "1903", "correct": "2002"},
    {"person": "curie", "question": "Marie Curie was born in 1971, is that accurate?", "wrong": "1971", "correct": "1867"},
    {"person": "curie", "question": "Curie won her first Nobel Prize in 2009?", "wrong": "2009", "correct": "1903"},
]

FastLanguageModel.for_inference(model)
correction_log = []

for i, q in enumerate(CORRECTION_QUESTIONS):
    print(f"\n--- Question {i+1}/{len(CORRECTION_QUESTIONS)} [{q['person']}] ---")
    print(f"Q (wrong): {q['question']}")
    
    prompt = f"<|im_start|>user\n{q['question']}<|im_end|>\n<|im_start|>assistant\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True,
                                  pad_token_id=tokenizer.eos_token_id, do_sample=False)
    
    response = tokenizer.decode(outputs[0]).split("assistant")[-1].strip()
    response = response.replace("<|endoftext|>", "").replace("<|im_end|>", "")
    
    print(f"A: {response[:150]}...")
    
    response_lower = response.lower()
    has_correct = q["correct"] in response
    indicated_wrong = any(w in response_lower for w in ["no", "incorrect", "wrong", "actually", "not"])
    
    if has_correct and indicated_wrong:
        status = " CORRECTED"
        score = 1.0
    elif has_correct:
        status = " GAVE CORRECT (no explicit correction)"
        score = 0.7
    elif q["wrong"] in response and q["correct"] not in response:
        status = " ACCEPTED WRONG"
        score = 0.0
    else:
        status = " UNCLEAR"
        score = 0.3
    
    print(f"   {status}")
    
    correction_log.append({
        "person": q["person"],
        "question": q["question"],
        "response": response,
        "has_correct": has_correct,
        "indicated_wrong": indicated_wrong,
        "score": score
    })

# Summary
print("\n" + "="*60)
print("CORRECTION TEST SUMMARY")
print("="*60)

corr_avg = sum(c["score"] for c in correction_log) / len(correction_log)
corrected = sum(1 for c in correction_log if c["score"] == 1.0)
failed = sum(1 for c in correction_log if c["score"] == 0.0)

print(f"Overall: {corr_avg:.1%}")
print(f" Fully corrected: {corrected}/{len(correction_log)}")
print(f" Failed: {failed}/{len(correction_log)}")

CORRECTION_TEST = {
    "questions": correction_log,
    "avg_score": corr_avg,
    "corrected_count": corrected,
    "failed_count": failed
}

In [None]:
# Cell 13: Extended 100-Turn Test
# Stress test with real facts and corrections mixed

print("EXTENDED CONVERSATION TEST")
print("="*60)
print("Testing until 100 turns OR running avg < 20%\n")

QUESTION_POOL = [
    # Obama - Real
    {"type": "real", "person": "obama", "q": "Where was Barack Obama born?", "expected": ["honolulu", "hawaii"]},
    {"type": "real", "person": "obama", "q": "What year was Barack Obama born?", "expected": ["1961"]},
    {"type": "real", "person": "obama", "q": "What award did Obama win in 2009?", "expected": ["nobel", "peace"]},
    {"type": "real", "person": "obama", "q": "What number president was Obama?", "expected": ["44"]},
    {"type": "real", "person": "obama", "q": "Where did Obama go to law school?", "expected": ["harvard"]},
    
    # Musk - Real
    {"type": "real", "person": "musk", "q": "Where was Elon Musk born?", "expected": ["pretoria", "south africa"]},
    {"type": "real", "person": "musk", "q": "What year was Elon Musk born?", "expected": ["1971"]},
    {"type": "real", "person": "musk", "q": "What company does Musk run that makes electric cars?", "expected": ["tesla"]},
    {"type": "real", "person": "musk", "q": "What space company did Musk found?", "expected": ["spacex"]},
    {"type": "real", "person": "musk", "q": "When was SpaceX founded?", "expected": ["2002"]},
    {"type": "real", "person": "musk", "q": "What is Musk's goal for Mars?", "expected": ["colony", "colonize"]},
    
    # Curie - Real
    {"type": "real", "person": "curie", "q": "Where was Marie Curie born?", "expected": ["warsaw", "poland"]},
    {"type": "real", "person": "curie", "q": "What year was Marie Curie born?", "expected": ["1867"]},
    {"type": "real", "person": "curie", "q": "What elements did Curie discover?", "expected": ["polonium", "radium"]},
    {"type": "real", "person": "curie", "q": "When did Curie win her first Nobel Prize?", "expected": ["1903"]},
    {"type": "real", "person": "curie", "q": "When did Curie win the Nobel Prize in Chemistry?", "expected": ["1911"]},
    
    # Correction questions
    {"type": "correction", "person": "obama", "q": "Obama was born in 1867, right?", "expected": ["1961", "no", "wrong"]},
    {"type": "correction", "person": "obama", "q": "Obama won the Nobel Prize in 1903?", "expected": ["2009", "no", "wrong"]},
    {"type": "correction", "person": "musk", "q": "Musk was born in 1867?", "expected": ["1971", "no", "wrong"]},
    {"type": "correction", "person": "musk", "q": "SpaceX was founded in 1903?", "expected": ["2002", "no", "wrong"]},
    {"type": "correction", "person": "curie", "q": "Curie was born in 1971?", "expected": ["1867", "no", "wrong"]},
    {"type": "correction", "person": "curie", "q": "Curie won her first Nobel in 2009?", "expected": ["1903", "no", "wrong"]},
]

FastLanguageModel.for_inference(model)
extended_log = []
running_scores = []
MAX_TURNS = 100
MIN_SCORE = 0.20

print(f"Question pool: {len(QUESTION_POOL)} questions")
print(f"Max turns: {MAX_TURNS} | Stop if running avg < {MIN_SCORE:.0%}\n")

for turn in range(MAX_TURNS):
    q = random.choice(QUESTION_POOL)
    
    # Build prompt with recent history
    recent_history = ""
    if len(extended_log) > 0:
        for t in extended_log[-5:]:
            recent_history += f"<|im_start|>user\n{t['question']}<|im_end|>\n<|im_start|>assistant\n{t['response']}<|im_end|>\n"
    
    prompt = f"{recent_history}<|im_start|>user\n{q['q']}<|im_end|>\n<|im_start|>assistant\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=150, use_cache=True,
                                  pad_token_id=tokenizer.eos_token_id, do_sample=False)
    
    response = tokenizer.decode(outputs[0]).split("assistant")[-1].strip()
    response = response.replace("<|endoftext|>", "").replace("<|im_end|>", "")
    
    # Score
    response_lower = response.lower()
    hits = sum(1 for exp in q["expected"] if exp.lower() in response_lower)
    score = hits / len(q["expected"]) if q["expected"] else 0
    
    running_scores.append(score)
    running_avg = sum(running_scores[-10:]) / len(running_scores[-10:])
    
    status = "" if score >= 0.5 else ""
    print(f"[{turn+1:3d}] {status} {q['type']:10s} | {q['person']:6s} | Score: {score:.0%} | Running: {running_avg:.0%}")
    
    extended_log.append({
        "turn": turn + 1,
        "type": q["type"],
        "person": q["person"],
        "question": q["q"],
        "expected": q["expected"],
        "response": response,
        "score": score,
        "running_avg": running_avg
    })
    
    if turn >= 10 and running_avg < MIN_SCORE:
        print(f"\n STOPPED: Running average ({running_avg:.0%}) dropped below {MIN_SCORE:.0%}")
        break

# Summary
print("\n" + "="*60)
print("EXTENDED TEST SUMMARY")
print("="*60)

total_turns = len(extended_log)
overall_avg = sum(t["score"] for t in extended_log) / total_turns

real_turns = [t for t in extended_log if t["type"] == "real"]
corr_turns = [t for t in extended_log if t["type"] == "correction"]

real_avg = sum(t["score"] for t in real_turns) / len(real_turns) if real_turns else 0
corr_avg = sum(t["score"] for t in corr_turns) / len(corr_turns) if corr_turns else 0

print(f"\nTotal turns: {total_turns}")
print(f"Overall: {overall_avg:.1%}")
print(f"  Real facts: {real_avg:.1%} ({len(real_turns)} Qs)")
print(f"  Corrections: {corr_avg:.1%} ({len(corr_turns)} Qs)")

per_person_ext = {}
for pid in ["obama", "musk", "curie"]:
    person_turns = [t for t in extended_log if t["person"] == pid]
    if person_turns:
        per_person_ext[pid] = sum(t["score"] for t in person_turns) / len(person_turns)
        status = "" if per_person_ext[pid] >= 0.5 else ""
        print(f"  {status} {pid}: {per_person_ext[pid]:.0%}")

EXTENDED_TEST = {
    "turns": extended_log,
    "total_turns": total_turns,
    "overall_avg": overall_avg,
    "real_avg": real_avg,
    "correction_avg": corr_avg,
    "per_person": per_person_ext,
    "stopped_early": total_turns < MAX_TURNS
}

In [None]:
# Cell 14: Comparison Dashboard
# Summary of all test results

import matplotlib.pyplot as plt

print("="*60)
print("EXPERIMENT DASHBOARD")
print(f"Mode: {LEARNING_MODE} | Style: {INTERVIEW_STYLE}")
print("="*60)

# Collect all scores
single_q_avg = sum(r["scores"]["overall"] for r in single_q_results.values()) / len(single_q_results)
conv_avg = CONVERSATION_TEST["overall_score"]
corr_avg = CORRECTION_TEST["avg_score"]
ext_avg = EXTENDED_TEST["overall_avg"]

# Plot
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# Plot 1: Test comparison
tests = ["Single Q", "6-Turn Conv", "Correction", "Extended"]
scores = [single_q_avg, conv_avg, corr_avg, ext_avg]
colors = ['#3498db' if s >= 0.5 else '#e74c3c' for s in scores]

axes[0].bar(tests, scores, color=colors)
axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[0].set_ylim(0, 1)
axes[0].set_title(f"Test Results\n{LEARNING_MODE} / {INTERVIEW_STYLE}")
axes[0].set_ylabel("Score")
for i, (test, score) in enumerate(zip(tests, scores)):
    axes[0].text(i, score + 0.02, f"{score:.0%}", ha='center')

# Plot 2: Per-person comparison
persons = ["obama", "musk", "curie"]
person_scores = [single_q_results[p]["scores"]["overall"] for p in persons]
person_colors = {'obama': '#3498db', 'musk': '#e74c3c', 'curie': '#9b59b6'}

axes[1].bar(persons, person_scores, color=[person_colors[p] for p in persons])
axes[1].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[1].set_ylim(0, 1)
axes[1].set_title("Per-Person Recall (Single Q)")
axes[1].set_ylabel("Score")
for i, score in enumerate(person_scores):
    axes[1].text(i, score + 0.02, f"{score:.0%}", ha='center')

# Plot 3: Extended test running average
if EXTENDED_TEST["turns"]:
    turns = [t["turn"] for t in EXTENDED_TEST["turns"]]
    running_avgs = [t["running_avg"] for t in EXTENDED_TEST["turns"]]
    axes[2].plot(turns, running_avgs, 'b-', linewidth=2)
    axes[2].axhline(y=0.2, color='red', linestyle='--', alpha=0.5, label='Stop threshold')
    axes[2].axhline(y=0.5, color='green', linestyle='--', alpha=0.5, label='50% target')
    axes[2].set_xlabel("Turn")
    axes[2].set_ylabel("Running Avg (last 10)")
    axes[2].set_title(f"Extended Test Stability\n({EXTENDED_TEST['total_turns']} turns)")
    axes[2].set_ylim(0, 1)
    axes[2].legend(loc='lower left')

plt.tight_layout()
plt.show()

# Text summary
print("\n" + "-"*40)
print("SUMMARY")
print("-"*40)
print(f"\nConfiguration:")
print(f"  Learning Mode: {LEARNING_MODE}")
print(f"  Interview Style: {INTERVIEW_STYLE}")
print(f"  LoRA: r={RANK}, alpha={ALPHA}, lr={LEARNING_RATE}")

print(f"\nTest Results:")
print(f"  Single Question: {single_q_avg:.1%}")
print(f"  6-Turn Conversation: {conv_avg:.1%}")
print(f"  Correction Test: {corr_avg:.1%}")
print(f"  Extended Test: {ext_avg:.1%} ({EXTENDED_TEST['total_turns']} turns)")

overall = (single_q_avg + conv_avg + corr_avg + ext_avg) / 4
print(f"\nOverall Average: {overall:.1%}")

if overall >= 0.5:
    print("\n IMPLICIT LEARNING SUCCESSFUL!")
else:
    print("\n NEEDS IMPROVEMENT - Try different mode/style")

In [None]:
# Cell 15: Save Results
# Export all results to JSON

from datetime import datetime

filename = f"implicit_experiment_{LEARNING_MODE}_{INTERVIEW_STYLE}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"

results = {
    "metadata": {
        "timestamp": datetime.now().isoformat(),
        "experiment_type": "implicit_learning",
        "learning_mode": LEARNING_MODE,
        "interview_style": INTERVIEW_STYLE,
        "model": "Qwen/Qwen2.5-7B-Instruct",
        "lora_rank": RANK,
        "lora_alpha": ALPHA,
        "learning_rate": LEARNING_RATE,
        "max_steps": MAX_STEPS,
    },
    "single_question": {
        "results": {pid: {"score": r["scores"]["overall"], "recall": r["recall"]} for pid, r in single_q_results.items()},
        "avg": single_q_avg
    },
    "conversation_6turn": CONVERSATION_TEST,
    "correction_test": CORRECTION_TEST,
    "extended_test": EXTENDED_TEST,
    "summary": {
        "single_q_avg": single_q_avg,
        "conversation_avg": conv_avg,
        "correction_avg": corr_avg,
        "extended_avg": ext_avg,
        "overall_avg": (single_q_avg + conv_avg + corr_avg + ext_avg) / 4,
        "extended_turns": EXTENDED_TEST["total_turns"],
        "stopped_early": EXTENDED_TEST["stopped_early"],
    }
}

with open(filename, 'w') as f:
    json.dump(results, f, indent=2)

print(f"Results saved to: {filename}")
print(f"\nExperiment: {LEARNING_MODE} / {INTERVIEW_STYLE}")
print(f"Overall Score: {results['summary']['overall_avg']:.1%}")

# Download in Colab
try:
    from google.colab import files
    files.download(filename)
    print(f"\n Download started!")
except:
    print(f"\n File saved locally (not in Colab)")