In [None]:

from unsloth import FastLanguageModel
import torch
from datasets import load_dataset

# ================= CONFIGURATION =================
model_name = "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit" 
max_seq_length = 8192 # Qwen handles context very well
dtype = None 
load_in_4bit = True 

# 1. Load the Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 2. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none", 
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# 3. Load Data & Format
dataset = load_dataset("json", data_files="./llama4_podcast_arts_1k_v2.jsonl", split="train")

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True,)

# 4. Train
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        max_steps = 375, # 375 steps is usually enough for style transfer
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "llama3_podcast_outputs",
    ),
)

trainer_stats = trainer.train()

# 5. Save
model.save_pretrained("llama3_4b_podcast_lora")
tokenizer.save_pretrained("llama3_4b_podcast_lora")

print("ðŸ’¾ Saving q4_k_m (Fast / Mobile Version)...")
model.save_pretrained_gguf(
    "podcast_llama3_q4", 
    tokenizer, 
    quantization_method = "q4_k_m"
)

In [None]:

from unsloth import FastLanguageModel
import torch
from datasets import load_dataset

# ================= CONFIGURATION =================
model_name = "unsloth/gemma-3-4b-it" 
max_seq_length = 8192 
dtype = None 
load_in_4bit = True 

# 1. Load the Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 2. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none", 
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# 3. Load Data & Format
dataset = load_dataset("json", data_files="./llama4_podcast_arts_1k_v2.jsonl", split="train")

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = []
    
    for convo in convos:
        # 1. Standardize roles for Gemma 3 (User -> Assistant -> User...)
        gemma_convo = []
        for i, msg in enumerate(convo):
            # We map Alex/Jamie to User/Assistant based on turn index
            # Turn 0, 2, 4... become 'user'
            # Turn 1, 3, 5... become 'model' (Gemma's assistant role)
            role = "user" if i % 2 == 0 else "model"
            
            # Prepend the character name so the model learns WHO is talking
            content = f"{msg['role'].upper()}: {msg['content']}"
            gemma_convo.append({"role": role, "content": content})
            
        text = tokenizer.apply_chat_template(gemma_convo, tokenize=False, add_generation_prompt=False)
        texts.append(text)
        
    return { "text" : texts }

dataset = dataset.map(formatting_prompts_func, batched = True,)

# 4. Train
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        max_steps = 375,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "gemma3_podcast_outputs",
    ),
)

trainer_stats = trainer.train()

# 5. Save
model.save_pretrained("gemma3_4b_podcast_lora")
tokenizer.save_pretrained("gemma3_4b_podcast_lora")

print("ðŸ’¾ Saving q4_k_m (Fast / Mobile Version)...")
model.save_pretrained_gguf(
    "podcast_gemma3_q4", 
    tokenizer, 
    quantization_method = "q4_k_m"
)

In [None]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
import os

# 1. Memory Management Environment Variable
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# ================= CONFIGURATION =================
model_name = "unsloth/gemma-3-4b-it"
max_seq_length = 8192
dtype = None
load_in_4bit = True

# 1. Load the Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# 2. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth", # Optimized for VRAM
    random_state = 3407,
)

# 3. Load Data & Format
dataset = load_dataset("json", data_files="./llama4_podcast_arts_1k_v2.jsonl", split="train")

def formatting_prompts_func(examples):
    convos = examples["messages"]
    texts = []
    for convo in convos:
        gemma_convo = []
        for i, msg in enumerate(convo):
            role = "user" if i % 2 == 0 else "model"
            content = f"{msg['role'].upper()}: {msg['content']}"
            gemma_convo.append({"role": role, "content": content})

        text = tokenizer.apply_chat_template(gemma_convo, tokenize=False, add_generation_prompt=False)
        texts.append(text)
    return { "text" : texts }

dataset = dataset.map(formatting_prompts_func, batched = True)

# 4. Training Arguments - CRITICAL CHANGES
from trl import SFTTrainer
from transformers import TrainingArguments

# Clear cache before starting
torch.cuda.empty_cache()

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 1, # Reduced from 2 to 1
        gradient_accumulation_steps = 8, # Doubled to keep effective batch size at 8
        warmup_steps = 10,
        max_steps = 375,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "gemma3_podcast_outputs",
        save_total_limit = 2, # Don't fill up disk with checkpoints
    ),
)

trainer_stats = trainer.train()

# 5. Save Model
model.save_pretrained("gemma3_4b_podcast_lora")
tokenizer.save_pretrained("gemma3_4b_podcast_lora")

In [None]:
import json, random, time, re, wikipediaapi
from groq import Groq

# ================= CONFIGURATION =================
GROQ_API_KEY = ""
MASTER_TEACHER = "openai/gpt-oss-120b" 

DOMAINS = {
    # "True Crime": ["Zodiac Killer", "D. B. Cooper", "JonBenÃ©t Ramsey", "Lizzie Borden", "Black Dahlia", "Jack the Ripper", "Sherlock Holmes", "Golden State Killer", "Gardner Museum Heist"],
    "Future Tech": [
        # "Metaverse", "Web3",
         "Neuralink", "Quantum Cryptography", "Autonomous Vehicles", "Transhumanism", "Starlink", "Space Mining", "Nuclear Fusion"],
    "Health": ["Placebo Effect", "Circadian Rhythm", "Growth Mindset", "Lucid Dreaming", "Attachment Theory", "Biohacking", "Cold Hydrotherapy", "Flow State", "Microbiome"],
    "Finance": ["Inflation", "Hyperinflation", "Compound Interest", "Stock Market Crash 1929", "Bitcoin", "GameStop Squeeze", "Universal Basic Income", "Fiat Money", "Venture Capital"]
}

client = Groq(api_key=GROQ_API_KEY)
wiki = wikipediaapi.Wikipedia(user_agent='JamiePod/v23.0', language='en')

# ================= REFINERY UTILS =================

def split_content(text):
    """Separates <think> tags from dialogue and scrubs prefixes/labels."""
    think_match = re.search(r'<think>(.*?)</think>', text, re.DOTALL)
    thought = f"<think>{think_match.group(1).strip()}</think>" if think_match else ""
    
    dial = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
    # Remove Speaker Prefixes (e.g., 'ALEX:', 'JAMIE:')
    dial = re.sub(r'^(ALEX:|JAMIE:|Turn \d+:)', '', dial, flags=re.IGNORECASE).strip()
    return thought, dial

def generate_master_script(topic, context):
    """Teacher generates the 'Golden Copy' using a more robust prompt structure."""
    model = MASTER_TEACHER
    
    # We use a structured prompt to avoid token collision
    prompt = f"""
    ### SYSTEM INSTRUCTIONS
    You are a world-class podcast scriptwriter. 
    TASK: Write a 10-turn dialogue between ALEX (Host) and JAMIE (Expert) about {topic}.
    
    ### TOPIC CONTEXT
    {context[:2500]}
    
    ### FORMATTING RULES
    1. Every single turn MUST follow this exact pattern:
       SPEAKER: <think>Plan the response and check facts here</think> The actual dialogue goes here.
    2. JAMIE: Uses analogies, stays grounded.
    3. ALEX: Skeptical, asks 'How' and 'Why'.
    4. NO numbered lists. NO placeholders.
    
    ### OUTPUT BEGINS NOW
    """
    
    try:
        res = client.chat.completions.create(
            messages=[{"role":"user","content":prompt}], 
            model=model, 
            temperature=0.75,
            # We add a max_tokens limit to ensure the model doesn't cut off early
            max_tokens=4000 
        )
        content = res.choices[0].message.content
        if not content:
            print(f"Warning: Model returned empty content for {topic}")
        return content
    except Exception as e:
        print(f"Groq API Error on {topic}: {e}")
        return None

# ================= THE DUAL-MAPPER =================

def save_dual_datasets(turns, topic):
    """Creates two separate JSONL files by 'flipping' roles."""
    
    # Target Roles
    configurations = [
        {"file": "jamie_qwen.jsonl", "assistant_name": "JAMIE", "user_name": "ALEX"},
        {"file": "alex_qwen.jsonl", "assistant_name": "ALEX", "user_name": "JAMIE"}
    ]

    for config in configurations:
        fname = config["file"]
        target = config["assistant_name"]
        opponent = config["user_name"]

        with open(fname, "a") as f:
            for i in range(len(turns)):
                speaker, thought, dialogue = turns[i]
                
                # We only save a sample when the CURRENT speaker is the one we are training
                if speaker == target:
                    messages = [{"role": "system", "content": f"You are {target} discussing {topic}."}]
                    
                    # History Window (Last 4 turns)
                    for j in range(max(0, i - 4), i):
                        h_spk, _, h_dl = turns[j]
                        # If history turn was the target, it's 'assistant'. If opponent, it's 'user'.
                        role = "assistant" if h_spk == target else "user"
                        messages.append({"role": role, "content": h_dl})
                    
                    # The Target Turn: Dialogue + Thought (The Reasoning Trace)
                    final_content = f"{thought}\n\n{dialogue}"
                    messages.append({"role": "assistant", "content": final_content})
                    
                    f.write(json.dumps({"messages": messages}) + "\n")

# ================= EXECUTION =================

for domain, topics in DOMAINS.items():
    for topic in topics:
        print(f"Distilling Dual Map: {topic}")
        page = wiki.page(topic)
        script = generate_master_script(topic, page.summary[:2500] if page.exists() else "")
        
        if script:
            turns = []
            for line in script.split('\n'):
                if ":" not in line: continue
                spk, raw = line.split(":", 1)
                th, dl = split_content(raw)
                if dl: turns.append((spk.strip().upper(), th, dl))
            
            # This generates BOTH Jamie and Alex files in one pass
            save_dual_datasets(turns, topic)
        
        time.sleep(4)

Distilling Dual Map: Neuralink
Distilling Dual Map: Quantum Cryptography
Distilling Dual Map: Autonomous Vehicles
Distilling Dual Map: Transhumanism
Distilling Dual Map: Starlink
Distilling Dual Map: Space Mining
Distilling Dual Map: Nuclear Fusion
Distilling Dual Map: Placebo Effect
Distilling Dual Map: Circadian Rhythm
Distilling Dual Map: Growth Mindset
Distilling Dual Map: Lucid Dreaming
Distilling Dual Map: Attachment Theory
Distilling Dual Map: Biohacking
Distilling Dual Map: Cold Hydrotherapy
Distilling Dual Map: Flow State
Distilling Dual Map: Microbiome
Distilling Dual Map: Inflation
Distilling Dual Map: Hyperinflation
Distilling Dual Map: Compound Interest
Distilling Dual Map: Stock Market Crash 1929
Distilling Dual Map: Bitcoin
Distilling Dual Map: GameStop Squeeze
Distilling Dual Map: Universal Basic Income
Distilling Dual Map: Fiat Money
Distilling Dual Map: Venture Capital


In [None]:
# 1. Install Unsloth & Dependencies
# !pip install unsloth vllm bitsandbytes
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

# 2. Load the Qwen-4B Model (Optimized for Thinking/Reasoning)
max_seq_length = 4096 # Supports up to 128k, but 4k is perfect for 10-turn scripts
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-Instruct-2507-unsloth-bnb-4bit", # Latest Qwen3 4B build
    max_seq_length = max_seq_length,
    load_in_4bit = True,
)

# 3. Add LoRA Adapters (Rank 32 for jamie's Persona Depth)
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, # Lowered from 64 to prevent memorization of 220 rows
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32, # Matched to r
    lora_dropout = 0.05, # Added a tiny bit of dropout to help generalization
    bias = "none",    
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

# 4. Configure the ChatML Template (Crucial for <think> tags)
# This template tells the model where jamie ends and jamie begins
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "chatml", # Native support for reasoning tags
)

def formatting_prompts_func(examples):
    instructions = examples["messages"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in instructions]
    return { "text" : texts, }

# 5. Load your JSONL Data
from datasets import load_dataset
dataset = load_dataset("json", data_files="/kaggle/input/podcast-conversation-data/jamie_reason.jsonl", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

# 6. Set up the Trainer with Response-Only Masking
# This is the "Magic" line that masks out jamie's prompts
from unsloth.chat_templates import train_on_responses_only
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 1,      # Lowered for VRAM
        gradient_accumulation_steps = 8,     # Increased to compensate
        warmup_steps = 5,
        max_steps = 100, 
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",              
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
        # Fixes the 'int' mean error and saves VRAM
        average_tokens_across_devices = False,
        # Extreme VRAM saving (may slow down training slightly)
        gradient_checkpointing = True, 
    ),
)

# Apply the mask: model is only graded on what comes after <|im_start|>assistant
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

# 7. Train!
trainer.train()

model.save_pretrained("qwen3_4b_podcast_jamie_lora")
tokenizer.save_pretrained("qwen3_4b_podcast_jamie_lora")

# Save directly to GGUF (Optimized for Ollama/LM Studio)
print("ðŸ’¾ Saving q4_k_m (Fast / Mobile Version)...")
model.save_pretrained_gguf(
    "podcast_qwen3_jamie_q4km", 
    tokenizer, 
    quantization_method = "q4_k_m"
)