# Book Fixer - Kaggle Worker

This notebook processes books from the bookcli repository.

**Setup:**
1. Add your GitHub token as a Kaggle Secret named `GITHUB_TOKEN`
2. Add API keys as secrets (DEEPSEEK_API_KEY, GROQ_API_KEY, etc.)
3. Enable Internet access in notebook settings
4. Run all cells

In [None]:
# Install dependencies
!pip install -q requests httpx

In [None]:
import os
import json
import time
import requests
from pathlib import Path

# Try to get tokens from Kaggle secrets
try:
    from kaggle_secrets import UserSecretsClient
    secrets = UserSecretsClient()
    GITHUB_TOKEN = secrets.get_secret("GITHUB_TOKEN")
    DEEPSEEK_API_KEY = secrets.get_secret("DEEPSEEK_API_KEY") or ""
    GROQ_API_KEY = secrets.get_secret("GROQ_API_KEY") or ""
    OPENROUTER_KEY = secrets.get_secret("OPENROUTER_KEY") or ""
    TOGETHER_KEY = secrets.get_secret("TOGETHER_KEY") or ""
    FIREWORKS_API_KEY = secrets.get_secret("FIREWORKS_API_KEY") or ""
    CEREBRAS_API_KEY = secrets.get_secret("CEREBRAS_API_KEY") or ""
    print("Loaded secrets from Kaggle")
except Exception as e:
    print(f"Could not load Kaggle secrets: {e}")
    # Enter manually if secrets not available
    GITHUB_TOKEN = ""
    DEEPSEEK_API_KEY = ""
    GROQ_API_KEY = ""
    OPENROUTER_KEY = ""
    TOGETHER_KEY = ""
    FIREWORKS_API_KEY = ""
    CEREBRAS_API_KEY = ""

GITHUB_REPO = "ElliottSax/bookcli"
WORKER_ID = 1  # Change this for different workers
TOTAL_WORKERS = 3

print(f"GitHub Token: {'Set' if GITHUB_TOKEN else 'NOT SET'}")

In [None]:
# Clone repository
if not GITHUB_TOKEN:
    raise ValueError("Please set GITHUB_TOKEN!")

repo_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_REPO}.git"
!rm -rf /kaggle/working/bookcli
!git clone --depth 1 {repo_url} /kaggle/working/bookcli

os.chdir("/kaggle/working/bookcli")
print("Repository cloned!")

In [None]:
# API Configuration - only use APIs with keys
APIS = []
if GROQ_API_KEY:
    APIS.append(("Groq", "https://api.groq.com/openai/v1/chat/completions", GROQ_API_KEY, "llama-3.3-70b-versatile"))
if CEREBRAS_API_KEY:
    APIS.append(("Cerebras", "https://api.cerebras.ai/v1/chat/completions", CEREBRAS_API_KEY, "llama3.1-8b"))
if TOGETHER_KEY:
    APIS.append(("Together", "https://api.together.xyz/v1/chat/completions", TOGETHER_KEY, "meta-llama/Llama-3.3-70B-Instruct-Turbo"))
if FIREWORKS_API_KEY:
    APIS.append(("Fireworks", "https://api.fireworks.ai/inference/v1/chat/completions", FIREWORKS_API_KEY, "accounts/fireworks/models/llama-v3p3-70b-instruct"))
if OPENROUTER_KEY:
    APIS.append(("OpenRouter", "https://openrouter.ai/api/v1/chat/completions", OPENROUTER_KEY, "meta-llama/llama-3.2-3b-instruct:free"))
if DEEPSEEK_API_KEY:
    APIS.append(("DeepSeek", "https://api.deepseek.com/v1/chat/completions", DEEPSEEK_API_KEY, "deepseek-chat"))

if not APIS:
    raise ValueError("Please set at least one API key in Kaggle Secrets!")

print(f"Configured {len(APIS)} APIs: {[a[0] for a in APIS]}")

_api_idx = WORKER_ID - 1

def call_api(prompt, max_tokens=4000):
    global _api_idx
    for i in range(len(APIS)):
        idx = (_api_idx + i) % len(APIS)
        name, url, key, model = APIS[idx]
        try:
            resp = requests.post(url,
                headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
                json={"model": model, "messages": [{"role": "user", "content": prompt}],
                      "max_tokens": max_tokens, "temperature": 0.7},
                timeout=120)
            if resp.status_code == 200:
                print(f"  ✓ {name}")
                _api_idx = (idx + 1) % len(APIS)
                return resp.json()["choices"][0]["message"]["content"]
            elif resp.status_code == 429:
                print(f"  ⚠ {name} rate limited")
        except Exception as e:
            print(f"  ✗ {name}: {str(e)[:50]}")
    return None

# Test
print("Testing APIs...")
result = call_api("Say 'working' in one word")
print(f"Result: {result}")

In [None]:
def expand_chapter(content, title="", chapter_num=1):
    words = len(content.split())
    if words >= 2500:
        return content, False

    prompt = f"""Expand this chapter to at least 2500 words. Keep the same style.

CHAPTER {chapter_num}:
{content[:10000]}

Write the COMPLETE expanded chapter:"""

    result = call_api(prompt, max_tokens=5000)
    if result and len(result.split()) > words:
        return result, True
    return content, False


def fix_book(book_dir):
    bible_path = book_dir / "story_bible.json"
    if not bible_path.exists():
        return False

    with open(bible_path) as f:
        bible = json.load(f)

    if bible.get("quality_fixed"):
        return True

    chapters = sorted(book_dir.glob("chapter_*.md"))
    if not chapters:
        return False

    print(f"\nFixing: {book_dir.name}")
    fixes = 0

    for ch_path in chapters:
        content = ch_path.read_text()
        words = len(content.split())

        if words < 2500:
            print(f"  Expanding {ch_path.name} ({words}w)")
            ch_num = int(ch_path.stem.split('_')[1])
            expanded, changed = expand_chapter(content, bible.get('title', ''), ch_num)
            if changed:
                ch_path.write_text(expanded)
                print(f"    → {len(expanded.split())}w")
                fixes += 1

    bible["quality_fixed"] = True
    bible["fixed_by"] = f"kaggle_worker_{WORKER_ID}"
    with open(bible_path, "w") as f:
        json.dump(bible, f, indent=2)

    print(f"  ✓ Fixed {fixes} chapters")
    return True

In [None]:
# Find and distribute books
fiction_dir = Path("/kaggle/working/bookcli/output/fiction")
all_books = sorted([d for d in fiction_dir.iterdir() if d.is_dir()])

unfixed = []
for book in all_books:
    bible_path = book / "story_bible.json"
    if bible_path.exists():
        try:
            with open(bible_path) as f:
                bible = json.load(f)
            if not bible.get("quality_fixed"):
                unfixed.append(book)
        except:
            unfixed.append(book)

my_books = [b for i, b in enumerate(unfixed) if i % TOTAL_WORKERS == (WORKER_ID - 1)]

print(f"Total: {len(all_books)} | Unfixed: {len(unfixed)} | This worker: {len(my_books)}")

In [None]:
# Process books
fixed = 0
for i, book in enumerate(my_books):
    print(f"[{i+1}/{len(my_books)}] {book.name}")
    try:
        if fix_book(book):
            fixed += 1
    except Exception as e:
        print(f"  Error: {e}")
    time.sleep(2)

print(f"\nFixed {fixed} books")

In [None]:
# Push changes
!git config user.name "Kaggle Worker"
!git config user.email "kaggle@worker.local"
!git add output/fiction/
!git diff --staged --stat

import subprocess
if subprocess.run(["git", "diff", "--staged", "--quiet"]).returncode != 0:
    !git commit -m "Fix books (Kaggle Worker {WORKER_ID})"
    !git push
    print("Pushed!")
else:
    print("No changes")