# Cofone — Complete Feature Reference
Every single feature of the library, in one notebook.

In [None]:
from pathlib import Path
from dotenv import load_dotenv
from cofone import RAG

load_dotenv(Path().resolve().parent / '.env')
BASE = Path().resolve()

---
## 1 · Basic usage
Load a file and ask a question. That's it.

In [None]:
answer = RAG().add_source(BASE / 'note_ex.txt').run('Summarize')
print(answer)

---
## 2 · Debug mode
Shows provider, model, loaded docs, chunk count and text preview.

In [None]:
answer = (
    RAG()
    .debug()
    .add_source(BASE / 'note_ex.txt')
    .run('Summarize')
)
print(answer)

---
## 3 · Chunking modes
Control how the text is split before indexing.

In [None]:
# smart (default): paragraphs first; if a paragraph > 600 chars, splits into sentences
RAG(chunk_mode='smart').add_source(BASE / 'note_ex.txt').run('What is this about?')

In [None]:
# paragraphs: split only on blank lines
RAG(chunk_mode='paragraphs').add_source(BASE / 'note_ex.txt').run('What is this about?')

In [None]:
# sentences: split on . ! ?
RAG(chunk_mode='sentences').add_source(BASE / 'note_ex.txt').run('What is this about?')

In [None]:
# fixed: fixed-length slices (500 chars, 50 overlap)
RAG(chunk_mode='fixed').add_source(BASE / 'note_ex.txt').run('What is this about?')

In [None]:
# Compare chunk counts across all modes
for mode in ['smart', 'paragraphs', 'sentences', 'fixed']:
    rag = RAG(chunk_mode=mode).add_source(BASE / 'note_ex.txt')
    print(f'[{mode:12s}] → {len(rag._retriever.chunks)} chunks')

---
## 4 · Retrieval — BM25 vs FAISS

In [None]:
# BM25 (default) — keyword matching, no extra deps
RAG().debug().add_source(BASE / 'docs_ex/').run('Who is Leonardo?')

In [None]:
# FAISS — semantic search via sentence-transformers embeddings
RAG(faiss=True).debug().add_source(BASE / 'docs_ex/').run('Who is Leonardo?')

In [None]:
# FAISS with multilingual model — better for Italian/mixed docs
RAG(faiss=True, embedding_model='paraphrase-multilingual-MiniLM-L12-v2') \
    .debug() \
    .add_source(BASE / 'docs_ex/') \
    .run('Describe the paintings')

---
## 5 · FAISS persistence (cache to disk)
First run: builds & saves index. Second run: loads instantly, no recompute.

In [None]:
db = BASE / '.cofone_cache'

# First run — slow (computes embeddings)
RAG(faiss=True, persist_path=db).add_source(BASE / 'docs_ex/').run('Who is Leonardo?')

In [None]:
# Second run — instant (loads from disk)
answer = RAG(faiss=True, persist_path=db).add_source(BASE / 'docs_ex/').run('What did Leonardo invent?')
print(answer)

---
## 6 · Providers & models
OpenRouter, OpenAI, Gemini, Ollama — all via the same interface.

In [None]:
# ── OpenRouter (default) — 200+ models with one key ──────────────────────
RAG(model='arcee-ai/trinity-large-preview:free').add_source(BASE / 'note_ex.txt').run('Summarize')

In [None]:
RAG(model='meta-llama/llama-3.3-70b-instruct:free').add_source(BASE / 'note_ex.txt').run('Summarize')

In [None]:
RAG(model='google/gemini-2.0-flash-exp:free').add_source(BASE / 'note_ex.txt').run('Summarize')

In [None]:
# ── OpenAI direct — requires OPENAI_API_KEY ───────────────────────────────
# RAG(provider='openai', model='gpt-4o-mini').add_source(BASE / 'note_ex.txt').run('Summarize')
# RAG(provider='openai', model='gpt-4o').add_source(BASE / 'note_ex.txt').run('Summarize')
# RAG(model='gpt-4o-mini').add_source(BASE / 'note_ex.txt').run('Summarize')  # auto-detected

In [None]:
# ── Gemini direct — requires GEMINI_API_KEY ───────────────────────────────
# RAG(provider='gemini', model='gemini-2.0-flash').add_source(BASE / 'note_ex.txt').run('Summarize')
# RAG(model='gemini-2.0-flash').add_source(BASE / 'note_ex.txt').run('Summarize')  # auto-detected

In [None]:
# ── Ollama local — no key, must be running on localhost ───────────────────
# RAG(provider='ollama', model='llama3').add_source(BASE / 'note_ex.txt').run('Summarize')
# RAG(provider='ollama', model='mistral').add_source(BASE / 'note_ex.txt').run('Summarize')
# RAG(provider='ollama', model='phi3').add_source(BASE / 'note_ex.txt').run('Summarize')

In [None]:
# Auto-detection from model name — no need to set provider manually
from cofone.llm import _detect_provider
for model, expected in [
    ('arcee-ai/trinity-large-preview:free', 'openrouter'),
    ('gpt-4o-mini',                         'openai'),
    ('gemini-2.0-flash',                    'gemini'),
]:
    detected = _detect_provider(None, model, None)
    print(f"{'✓' if detected == expected else '✗'} {model!r:45s} → {detected}")

---
## 7 · Sources — files, folders, PDF, URL, Wikipedia, YouTube

In [None]:
# Single .txt file
RAG().add_source(BASE / 'note_ex.txt').run('Summarize')

In [None]:
# Folder (recursively loads .txt .md .pdf)
RAG().add_source(BASE / 'docs_ex/').run('Who is Leonardo?')

In [None]:
# Wikipedia page
RAG().debug().add_source('https://en.wikipedia.org/wiki/Artificial_intelligence').run('What is AI?')

In [None]:
# Any web URL
# RAG().add_source('https://example.com/article').run('Summarize')

In [None]:
# YouTube transcript — first YouTube video ever
RAG().debug().add_source('https://www.youtube.com/watch?v=jNQXAC9IVRw').run('What is this video about?')

In [None]:
# PDF — requires: pip install pypdf
# RAG().add_source('report.pdf').run('Summarize')

---
## 8 · Multiple sources
Chain as many `.add_source()` calls as you want.

In [None]:
answer = (
    RAG()
    .add_source(BASE / 'note_ex.txt')
    .add_source(BASE / 'docs_ex/')
    .run('What do you know about Cofone and Leonardo?')
)
print(answer)

---
## 9 · Chat memory
`.chat()` keeps conversation history. Follow-up questions have full context.

In [None]:
bot = RAG().add_source(BASE / 'docs_ex/')

r1 = bot.chat('Who is Leonardo da Vinci?')
print('Q1:', r1)

r2 = bot.chat('When was he born?')  # knows 'he' = Leonardo
print('Q2:', r2)

r3 = bot.chat('What are his most famous paintings?')
print('Q3:', r3)

In [None]:
# Reset memory and start fresh
bot.reset_memory()
print(bot.chat('What are we talking about?'))  # no context anymore

In [None]:
# memory=True flag — same as using .chat() but explicit
bot2 = RAG(memory=True).add_source(BASE / 'docs_ex/')
print(bot2.run('Who is Leonardo?'))
print(bot2.run('What did he paint?'))

---
## 10 · Structured output (Pydantic)
Get back a validated Python object instead of a string.

In [None]:
from pydantic import BaseModel

class Person(BaseModel):
    name: str
    birth_year: int
    nationality: str
    most_famous_work: str

data = RAG().add_source(BASE / 'docs_ex/').run('Extract data about Leonardo', schema=Person)

print(f'name:             {data.name}')
print(f'birth_year:       {data.birth_year}')
print(f'nationality:      {data.nationality}')
print(f'most_famous_work: {data.most_famous_work}')

In [None]:
# More complex schema example
from typing import List

class AISummary(BaseModel):
    definition: str
    main_subfields: List[str]
    biggest_challenge: str

result = (
    RAG()
    .add_source('https://en.wikipedia.org/wiki/Artificial_intelligence')
    .run('Extract key info about AI', schema=AISummary)
)
print(result.model_dump_json(indent=2))

---
## 11 · Streaming
Tokens arrive and print one by one — no waiting for the full response.

In [None]:
rag = RAG().add_source(BASE / 'docs_ex/')
for token in rag.stream("Tell me about Leonardo's inventions"):
    print(token, end='', flush=True)
print()

In [None]:
# Streaming with debug
rag2 = RAG().debug().add_source(BASE / 'note_ex.txt')
for token in rag2.stream('Describe the chunking system'):
    print(token, end='', flush=True)
print()

---
## 12 · Custom tools
Attach functions the agent can use alongside the retrieved context.

In [None]:
def calculate(expression: str) -> str:
    try:
        return f'Result: {eval(expression)}'
    except Exception as e:
        return f'Error: {e}'

def word_count(text: str) -> str:
    return f'Word count: {len(text.split())}'

answer = (
    RAG()
    .add_tool(calculate)
    .add_tool(word_count)
    .add_source(BASE / 'note_ex.txt')
    .run('Summarize and tell me what is 144 / 12')
)
print(answer)

---
## 13 · API key — 3 ways

In [None]:
# Way 1: direct parameter (highest priority)
# RAG(api_key='sk-or-...').add_source(BASE / 'note_ex.txt').run('Summarize')

# Way 2: .env file in project root
# OPENROUTER_API_KEY=sk-or-...
# OPENAI_API_KEY=sk-...
# GEMINI_API_KEY=AI...

# Way 3: system environment variable
# $env:OPENROUTER_API_KEY='sk-or-...'    (PowerShell)
# export OPENROUTER_API_KEY='sk-or-...'  (bash/zsh)

print('Keys OK if tests above worked.')

---
## RAG() — Full Parameter Reference

| Parameter | Default | Description |
|---|---|---|
| `model` | provider default | LLM model string |
| `provider` | `openrouter` | `openrouter` / `openai` / `gemini` / `ollama` |
| `api_key` | `None` | API key — overrides .env |
| `base_url` | provider default | Custom API endpoint |
| `faiss` | `False` | Use FAISS semantic search |
| `embedding_model` | `all-MiniLM-L6-v2` | sentence-transformers model |
| `chunk_mode` | `smart` | `smart` / `paragraphs` / `sentences` / `fixed` |
| `persist_path` | `None` | Folder path to save/load FAISS index |
| `memory` | `False` | Enable chat memory (keeps history across `.run()` calls) |

## Methods

| Method | Returns | Description |
|---|---|---|
| `.add_source(path_or_url)` | `self` | Load file / folder / URL / YouTube |
| `.add_tool(fn)` | `self` | Attach a custom function |
| `.debug()` | `self` | Enable verbose logging |
| `.run(query, schema=None)` | `str` or Pydantic model | Single query |
| `.chat(query)` | `str` | Query with memory enabled |
| `.stream(query)` | generator of `str` | Streaming query |
| `.reset_memory()` | `self` | Clear conversation history |