Fast, tolerant EPUB parser for Go.
book, err := spine.ParseFile("book.epub")
if err != nil {
// book may still contain partial data + warnings
log.Fatal(err)
}
fmt.Println(book.Metadata.Title)
fmt.Println("Landmarks:", len(book.Landmarks))
fmt.Println("Pages:", len(book.PageList))
chunks, err := book.Chunks(spine.ChunkingOptions{Mode: spine.ChunkByParagraph})
if err != nil {
log.Fatal(err)
}
for _, c := range chunks {
fmt.Println(c.ID, c.Text)
}
chapters, err := book.Chapters(spine.ChapterOptions{TitleSource: spine.TitleAuto})
if err != nil {
log.Fatal(err)
}
for _, ch := range chapters {
fmt.Println(ch.SpineIndex, ch.Title, len(ch.Text))
}
// Stream chapters (memory-friendly for very large books).
err = book.ForEachChapter(spine.ChapterOptions{TitleSource: spine.TitleAuto}, func(ch spine.Chapter) error {
fmt.Println(ch.SpineIndex, ch.Title, len(ch.Text))
return nil
})
if err != nil {
log.Fatal(err)
}
// Resolve anchors (after building chunks).
if ref, ok := book.ResolveAnchor("OEBPS/chapter1.xhtml#c1"); ok {
fmt.Println(ref.ChunkID, ref.Offset)
}
// Cover extraction.
if cover, err := book.Cover(); err == nil {
fmt.Println(cover.ContentType, len(cover.Bytes))
}cfg := spine.DefaultConfig()
cfg.Strict = false
cfg.Fallbacks.GenerateTOC = true
cfg.Chunking = spine.ChunkingOptions{Mode: spine.ChunkBySize, MaxChars: 2000}
parser := spine.NewParser(cfg)
book, err := parser.ParseFile("book.epub")For a full API reference, see docs/README.md (index) or docs/API.md.
Error details and examples are in docs/errors.md.
The parser returns sentinel errors you can check with errors.Is, such as ErrMissingContainer, ErrMalformedOPF, and ErrNoSpine.
- The parser is streaming-first: content is parsed on demand.
Parse(io.Reader)spools to a temp file when needed. CallClose()to release resources.- Anchor keys are normalized as
path#idinside the EPUB; useResolveAnchorfor convenience lookups. Strictdisables best-effort recovery; fallbacks stay off unless you explicitly setFallbacks.- Use
OpenCover/Coverto retrieve the cover image (if present). - TOC targets resolve after chunking; use
TOCWithTargetsif you need chunk offsets. - Run
go mod tidyto resolve module dependencies.
Add .epub files to testdata/fixtures (tracked) or testdata/fixtures/external (ignored) and generate golden outputs with (tracked fixtures by default):
go run ./cmd/spine-golden
go run ./cmd/spine-golden -externalThe test TestCompatibilityFixtures will compare against the generated JSON files in testdata/expected.
Fixture provenance and licenses are documented in testdata/fixtures/SOURCES.md.
Benchmarks pick the largest .epub in testdata/fixtures by default.
Override with SPINE_BENCH_FIXTURE=/path/to/book.epub.
go test -run '^$' -bench BenchmarkOpenLargestFixture -benchmem
go test -run '^$' -bench BenchmarkParseBytesLargestFixture -benchmem
go test -run '^$' -bench BenchmarkParseAndChunkLargestFixture -benchmemGenerate CPU/memory profiles:
make bench-cpu
make bench-memGenerate a small set of synthetic edge-case EPUBs with:
go run ./cmd/spine-fixturesDownload a limited set of Gutenberg EPUBs (respect their Terms of Use). Files go to testdata/fixtures/external by default:
go run ./cmd/spine-fetch-gutenberg -ids 11,84,1342 -variant epub3.images -yesDownload a limited set from W3C epub-tests and EPUBCheck test suites. Files go to testdata/fixtures/external by default:
go run ./cmd/spine-fetch-tests -yes