# Book Fixer - Google Colab Worker

This notebook processes books from the bookcli repository.

**Instructions:**
1. Get API keys from the providers listed below
2. Enter your GitHub token when prompted
3. Run all cells in order

In [None]:
# Install dependencies
!pip install -q requests httpx gitpython

In [None]:
# Configuration - ENTER YOUR KEYS HERE
GITHUB_TOKEN = ""  # @param {type:"string"}
GITHUB_REPO = "ElliottSax/bookcli"  # @param {type:"string"}
WORKER_ID = 1  # @param {type:"integer"}
TOTAL_WORKERS = 3  # @param {type:"integer"}

# API Keys - Get these from the providers:
# - DeepSeek: https://platform.deepseek.com/api_keys
# - Groq: https://console.groq.com/keys
# - OpenRouter: https://openrouter.ai/keys
# - Together: https://api.together.xyz/settings/api-keys
# - Fireworks: https://fireworks.ai/api-keys
# - Cerebras: https://cloud.cerebras.ai/
DEEPSEEK_API_KEY = ""  # @param {type:"string"}
GROQ_API_KEY = ""  # @param {type:"string"}
OPENROUTER_KEY = ""  # @param {type:"string"}
TOGETHER_KEY = ""  # @param {type:"string"}
FIREWORKS_API_KEY = ""  # @param {type:"string"}
CEREBRAS_API_KEY = ""  # @param {type:"string"}

In [None]:
import os
import json
import time
import requests
from pathlib import Path

# Clone repository
if not GITHUB_TOKEN:
    raise ValueError("Please set GITHUB_TOKEN in the cell above!")

repo_url = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_REPO}.git"
!rm -rf /content/bookcli
!git clone --depth 1 {repo_url} /content/bookcli

os.chdir("/content/bookcli")
print(f"Cloned to /content/bookcli")

In [None]:
# API Configuration - only use APIs with keys
APIS = []
if GROQ_API_KEY:
    APIS.append(("Groq", "https://api.groq.com/openai/v1/chat/completions", GROQ_API_KEY, "llama-3.3-70b-versatile"))
if CEREBRAS_API_KEY:
    APIS.append(("Cerebras", "https://api.cerebras.ai/v1/chat/completions", CEREBRAS_API_KEY, "llama3.1-8b"))
if TOGETHER_KEY:
    APIS.append(("Together", "https://api.together.xyz/v1/chat/completions", TOGETHER_KEY, "meta-llama/Llama-3.3-70B-Instruct-Turbo"))
if FIREWORKS_API_KEY:
    APIS.append(("Fireworks", "https://api.fireworks.ai/inference/v1/chat/completions", FIREWORKS_API_KEY, "accounts/fireworks/models/llama-v3p3-70b-instruct"))
if OPENROUTER_KEY:
    APIS.append(("OpenRouter", "https://openrouter.ai/api/v1/chat/completions", OPENROUTER_KEY, "meta-llama/llama-3.2-3b-instruct:free"))
if DEEPSEEK_API_KEY:
    APIS.append(("DeepSeek", "https://api.deepseek.com/v1/chat/completions", DEEPSEEK_API_KEY, "deepseek-chat"))

if not APIS:
    raise ValueError("Please set at least one API key!")

print(f"Configured {len(APIS)} APIs: {[a[0] for a in APIS]}")

_api_idx = WORKER_ID - 1

def call_api(prompt, max_tokens=4000):
    global _api_idx
    for i in range(len(APIS)):
        idx = (_api_idx + i) % len(APIS)
        name, url, key, model = APIS[idx]
        try:
            resp = requests.post(url,
                headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
                json={"model": model, "messages": [{"role": "user", "content": prompt}],
                      "max_tokens": max_tokens, "temperature": 0.7},
                timeout=120)
            if resp.status_code == 200:
                print(f"  ✓ {name}")
                _api_idx = (idx + 1) % len(APIS)
                return resp.json()["choices"][0]["message"]["content"]
            elif resp.status_code == 429:
                print(f"  ⚠ {name} rate limited")
        except Exception as e:
            print(f"  ✗ {name}: {str(e)[:50]}")
    return None

# Test API
print("Testing APIs...")
result = call_api("Say 'API working' in 3 words")
print(f"Result: {result}")

In [None]:
def expand_chapter(content, title="", chapter_num=1):
    """Expand a short chapter to at least 2500 words."""
    words = len(content.split())
    if words >= 2500:
        return content, False

    prompt = f"""Expand this chapter to at least 2500 words. Keep the same style, characters, and plot.
Do not add author notes, commentary, or meta-text. Just write the expanded chapter.

CHAPTER {chapter_num}:
{content[:10000]}

Write the COMPLETE expanded chapter (at least 2500 words):"""

    result = call_api(prompt, max_tokens=5000)
    if result and len(result.split()) > words:
        return result, True
    return content, False


def fix_book(book_dir):
    """Fix a single book."""
    bible_path = book_dir / "story_bible.json"
    if not bible_path.exists():
        return False

    with open(bible_path) as f:
        bible = json.load(f)

    if bible.get("quality_fixed"):
        return True  # Already done

    chapters = sorted(book_dir.glob("chapter_*.md"))
    if not chapters:
        return False

    print(f"\nFixing: {book_dir.name}")
    fixes = 0

    for ch_path in chapters:
        content = ch_path.read_text()
        words = len(content.split())

        if words < 2500:
            print(f"  Expanding {ch_path.name} ({words} words)")
            ch_num = int(ch_path.stem.split('_')[1])
            expanded, changed = expand_chapter(content, bible.get('title', ''), ch_num)
            if changed:
                ch_path.write_text(expanded)
                new_words = len(expanded.split())
                print(f"    Expanded to {new_words} words")
                fixes += 1

    # Mark as fixed
    bible["quality_fixed"] = True
    bible["fixed_by"] = f"colab_worker_{WORKER_ID}"
    with open(bible_path, "w") as f:
        json.dump(bible, f, indent=2)

    print(f"  ✓ Fixed {fixes} chapters")
    return True

In [None]:
# Find books to fix
fiction_dir = Path("/content/bookcli/output/fiction")
all_books = sorted([d for d in fiction_dir.iterdir() if d.is_dir()])

# Filter to unfixed books
unfixed = []
for book in all_books:
    bible_path = book / "story_bible.json"
    if bible_path.exists():
        try:
            with open(bible_path) as f:
                bible = json.load(f)
            if not bible.get("quality_fixed"):
                unfixed.append(book)
        except json.JSONDecodeError:
            unfixed.append(book)

# Distribute across workers
my_books = [b for i, b in enumerate(unfixed) if i % TOTAL_WORKERS == (WORKER_ID - 1)]

print(f"Total books: {len(all_books)}")
print(f"Unfixed: {len(unfixed)}")
print(f"This worker ({WORKER_ID}/{TOTAL_WORKERS}): {len(my_books)} books")

In [None]:
# Process books
fixed_count = 0
for i, book in enumerate(my_books):
    print(f"\n[{i+1}/{len(my_books)}] {book.name}")
    try:
        if fix_book(book):
            fixed_count += 1
    except Exception as e:
        print(f"  Error: {e}")
    
    # Rate limiting
    time.sleep(2)

print(f"\n\nFixed {fixed_count} books")

In [None]:
# Commit and push changes
!git config user.name "Colab Worker {WORKER_ID}"
!git config user.email "colab@worker.local"
!git add output/fiction/
!git diff --staged --stat

# Commit if there are changes
import subprocess
result = subprocess.run(["git", "diff", "--staged", "--quiet"])
if result.returncode != 0:
    !git commit -m "Fix books (Colab Worker {WORKER_ID})"
    !git push
    print("Changes pushed!")
else:
    print("No changes to commit")