diff --git a/packages/ingester/src/ingesters/CairoBookIngester.ts b/packages/ingester/src/ingesters/CairoBookIngester.ts
index 188b2d12..ebf14e2f 100644
--- a/packages/ingester/src/ingesters/CairoBookIngester.ts
+++ b/packages/ingester/src/ingesters/CairoBookIngester.ts
@@ -10,11 +10,11 @@ import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore';
 import { logger } from '@cairo-coder/agents/utils/index';
 import * as fs from 'fs/promises';
 import * as path from 'path';
+import { calculateHash } from '../utils/contentUtils';
 import {
-  addSectionWithSizeLimit,
-  calculateHash,
-  createAnchor,
-} from '../utils/contentUtils';
+  RecursiveMarkdownSplitter,
+  SplitOptions,
+} from '../utils/RecursiveMarkdownSplitter';
 
 /**
  * Ingester for the Cairo Book documentation
@@ -63,109 +63,50 @@ export class CairoBookIngester extends MarkdownIngester {
   }
 
   /**
-   * Chunk the core library summary file by H1 headers
+   * Chunk the core library summary file using RecursiveMarkdownSplitter
    *
-   * This function takes the markdown content and splits it into sections
-   * based on H1 headers (# Header). Each section becomes a separate chunk
-   * with its content hashed for uniqueness.
+   * This function takes the markdown content and splits it using a recursive
+   * strategy that respects headers, code blocks, and maintains overlap between chunks.
    *
    * @param text - The markdown content to chunk
-   * @returns Promise<Document<BookChunk>[]> - Array of document chunks, one per H1 section
+   * @returns Promise<Document<BookChunk>[]> - Array of document chunks
    */
   async chunkSummaryFile(text: string): Promise<Document<BookChunk>[]> {
-    const content = text;
-    const sections: ParsedSection[] = [];
-
-    // We can't use a simple global regex, as it will incorrectly match commented
-    // lines inside code blocks. Instead, we'll parse line-by-line to find
-    // "real" headers, while keeping track of whether we're inside a code block.
-
-    const realHeaders: { title: string; startIndex: number }[] = [];
-    const lines = content.split('\n');
-    let inCodeBlock = false;
-    let charIndex = 0;
-
-    for (const line of lines) {
-      // Toggle the state if we encounter a code block fence
-      if (line.trim().startsWith('```')) {
-        inCodeBlock = !inCodeBlock;
-      }
-
-      // A real H1 header is a line that starts with '# ' and is NOT in a code block.
-      // We use a specific regex to ensure it's a proper H1.
-      const h1Match = line.match(/^#{1,2}\s+(.+)$/);
-      if (!inCodeBlock && h1Match) {
-        realHeaders.push({
-          title: h1Match[1].trim(),
-          startIndex: charIndex,
-        });
-      }
-
-      // Move the character index forward, accounting for the newline character
-      charIndex += line.length + 1;
-    }
+    // Configure the splitter with appropriate settings
+    const splitOptions: SplitOptions = {
+      maxChars: 2048,
+      minChars: 500,
+      overlap: 256,
+      headerLevels: [1, 2], // Split on H1 and H2 headers
+      preserveCodeBlocks: true,
+      idPrefix: 'cairo-book',
+      trim: true,
+    };
 
-    // If no H1 headers were found, treat the entire content as one section.
-    if (realHeaders.length === 0) {
-      logger.debug(
-        'No H1 headers found, creating single section from entire content',
-      );
-      addSectionWithSizeLimit(
-        sections,
-        'Core Library Documentation',
-        content.trim(),
-        20000,
-        createAnchor('Core Library Documentation'),
-      );
-    } else {
-      // Process each valid H1 header found
-      for (let i = 0; i < realHeaders.length; i++) {
-        const header = realHeaders[i];
-        const headerTitle = header.title;
-        const headerStartIndex = header.startIndex;
-
-        // Determine the end of this section (start of next header or end of content)
-        const nextHeaderIndex =
-          i < realHeaders.length - 1
-            ? realHeaders[i + 1].startIndex
-            : content.length;
-
-        // Extract section content from the start of the header line to before the next header
-        const sectionContent = content
-          .slice(headerStartIndex, nextHeaderIndex)
-          .trim();
-
-        logger.debug(`Adding section: ${headerTitle}`);
-
-        addSectionWithSizeLimit(
-          sections,
-          headerTitle,
-          sectionContent,
-          20000,
-          createAnchor(headerTitle),
-        );
-      }
-    }
+    // Create the splitter and split the content
+    const splitter = new RecursiveMarkdownSplitter(splitOptions);
+    const chunks = splitter.splitMarkdownToChunks(text);
 
-    const localChunks: Document<BookChunk>[] = [];
-
-    // Create a document for each section
-    sections.forEach((section: ParsedSection, index: number) => {
-      const hash: string = calculateHash(section.content);
-      localChunks.push(
-        new Document<BookChunk>({
-          pageContent: section.content,
-          metadata: {
-            name: section.title,
-            title: section.title,
-            chunkNumber: index,
-            contentHash: hash,
-            uniqueId: `${section.title}-${index}`,
-            sourceLink: ``,
-            source: this.source, // Using placeholder for 'this.source'
-          },
-        }),
-      );
+    logger.info(
+      `Created ${chunks.length} chunks using RecursiveMarkdownSplitter`,
+    );
+
+    // Convert chunks to Document<BookChunk> format
+    const localChunks: Document<BookChunk>[] = chunks.map((chunk) => {
+      const contentHash = calculateHash(chunk.content);
+
+      return new Document<BookChunk>({
+        pageContent: chunk.content,
+        metadata: {
+          name: chunk.meta.title,
+          title: chunk.meta.title,
+          chunkNumber: chunk.meta.chunkNumber, // Already 0-based
+          contentHash: contentHash,
+          uniqueId: chunk.meta.uniqueId,
+          sourceLink: '',
+          source: this.source,
+        },
+      });
     });
 
     return localChunks;
diff --git a/packages/ingester/src/ingesters/CoreLibDocsIngester.ts b/packages/ingester/src/ingesters/CoreLibDocsIngester.ts
index 7162acee..0f78c358 100644
--- a/packages/ingester/src/ingesters/CoreLibDocsIngester.ts
+++ b/packages/ingester/src/ingesters/CoreLibDocsIngester.ts
@@ -2,19 +2,15 @@ import * as fs from 'fs/promises';
 import * as path from 'path';
 import { BookConfig } from '../utils/types';
 import { MarkdownIngester } from './MarkdownIngester';
-import {
-  BookChunk,
-  DocumentSource,
-  ParsedSection,
-} from '@cairo-coder/agents/types/index';
+import { BookChunk, DocumentSource } from '@cairo-coder/agents/types/index';
 import { Document } from '@langchain/core/documents';
 import { VectorStore } from '@cairo-coder/agents/db/postgresVectorStore';
 import { logger } from '@cairo-coder/agents/utils/index';
+import { calculateHash } from '../utils/contentUtils';
 import {
-  addSectionWithSizeLimit,
-  calculateHash,
-  createAnchor,
-} from '../utils/contentUtils';
+  RecursiveMarkdownSplitter,
+  SplitOptions,
+} from '../utils/RecursiveMarkdownSplitter';
 
 /**
  * Ingester for the Cairo Core Library documentation
@@ -63,84 +59,54 @@ export class CoreLibDocsIngester extends MarkdownIngester {
   }
 
   /**
-   * Chunk the core library summary file by H1 headers
+   * Chunk the core library summary file using RecursiveMarkdownSplitter
    *
-   * This function takes the markdown content and splits it into sections
-   * based on H1 headers (# Header). Each section becomes a separate chunk
-   * with its content hashed for uniqueness.
+   * This function takes the markdown content and splits it using a recursive
+   * strategy that respects headers, code blocks, and maintains overlap between chunks.
    *
    * @param text - The markdown content to chunk
-   * @returns Promise<Document<BookChunk>[]> - Array of document chunks, one per H1 section
+   * @returns Promise<Document<BookChunk>[]> - Array of document chunks
    */
   async chunkCorelibSummaryFile(text: string): Promise<Document<BookChunk>[]> {
-    const content = text;
-    const sections: ParsedSection[] = [];
-
-    // Regex to match H1 headers (# Header)
-    const headerRegex = /^(#{1})\s+(.+)$/gm;
-    const matches = Array.from(content.matchAll(headerRegex));
-
-    let lastSectionEndIndex = 0;
-
-    // Process each H1 header found
-    for (let i = 0; i < matches.length; i++) {
-      const match = matches[i];
-      const headerTitle = match[2].trim();
-      const headerStartIndex = match.index!;
-
-      // Determine the end of this section (start of next header or end of content)
-      const nextHeaderIndex =
-        i < matches.length - 1 ? matches[i + 1].index! : content.length;
-
-      // Extract section content from after the header to before the next header
-      const sectionContent = content
-        .slice(headerStartIndex, nextHeaderIndex)
-        .trim();
-
-      logger.debug(`Adding section: ${headerTitle}`);
-
-      addSectionWithSizeLimit(
-        sections,
-        headerTitle,
-        sectionContent,
-        20000,
-        createAnchor(headerTitle),
-      );
-    }
+    logger.info(
+      'Using RecursiveMarkdownSplitter to chunk Core Library documentation',
+    );
 
-    // If no H1 headers found, treat the entire content as one section
-    if (sections.length === 0) {
-      logger.debug(
-        'No H1 headers found, creating single section from entire content',
-      );
-      addSectionWithSizeLimit(
-        sections,
-        'Core Library Documentation',
-        content,
-        20000,
-        createAnchor('Core Library Documentation'),
-      );
-    }
+    // Configure the splitter with appropriate settings
+    const splitOptions: SplitOptions = {
+      maxChars: 2048,
+      minChars: 500,
+      overlap: 256,
+      headerLevels: [1, 2], // Split on H1 and H2 headers
+      preserveCodeBlocks: true,
+      idPrefix: 'corelib',
+      trim: true,
+    };
 
-    const localChunks: Document<BookChunk>[] = [];
-
-    // Create a document for each section
-    sections.forEach((section: ParsedSection, index: number) => {
-      const hash: string = calculateHash(section.content);
-      localChunks.push(
-        new Document<BookChunk>({
-          pageContent: section.content,
-          metadata: {
-            name: section.title,
-            title: section.title,
-            chunkNumber: index,
-            contentHash: hash,
-            uniqueId: `${section.title}-${index}`,
-            sourceLink: ``,
-            source: this.source,
-          },
-        }),
-      );
+    // Create the splitter and split the content
+    const splitter = new RecursiveMarkdownSplitter(splitOptions);
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    logger.info(
+      `Created ${chunks.length} chunks using RecursiveMarkdownSplitter`,
+    );
+
+    // Convert chunks to Document<BookChunk> format
+    const localChunks: Document<BookChunk>[] = chunks.map((chunk) => {
+      const contentHash = calculateHash(chunk.content);
+
+      return new Document<BookChunk>({
+        pageContent: chunk.content,
+        metadata: {
+          name: chunk.meta.title,
+          title: chunk.meta.title,
+          chunkNumber: chunk.meta.chunkNumber, // Already 0-based
+          contentHash: contentHash,
+          uniqueId: chunk.meta.uniqueId,
+          sourceLink: '',
+          source: this.source,
+        },
+      });
     });
 
     return localChunks;
diff --git a/packages/ingester/src/utils/RecursiveMarkdownSplitter.ts b/packages/ingester/src/utils/RecursiveMarkdownSplitter.ts
new file mode 100644
index 00000000..56856f59
--- /dev/null
+++ b/packages/ingester/src/utils/RecursiveMarkdownSplitter.ts
@@ -0,0 +1,749 @@
+import { logger } from '@cairo-coder/agents/utils/index';
+
+// Public API interfaces
+export interface SplitOptions {
+  /** Maximum characters per chunk (UTF-16 .length), not counting overlap. Default: 2048 */
+  maxChars?: number;
+  /** Minimum characters per chunk. Chunks smaller than this will be merged with adjacent chunks. Default: 500 */
+  minChars?: number;
+  /** Characters of backward overlap between consecutive chunks. Default: 256 */
+  overlap?: number;
+  /** Which header levels are allowed as primary split points. Default: [1, 2] */
+  headerLevels?: (1 | 2)[];
+  /** If true, do not split inside fenced code blocks. Default: true */
+  preserveCodeBlocks?: boolean;
+  /** Optional prefix for generated unique IDs */
+  idPrefix?: string;
+  /** Whether to trim whitespace around chunks. Default: true */
+  trim?: boolean;
+}
+
+export interface ChunkMeta {
+  /** Title derived from the last seen header among the configured levels */
+  title: string;
+  /** Index of this chunk for the given title (0-based) */
+  chunkNumber: number;
+  /** Globally unique ID: `${slug(title)}-${chunkNumber}` (plus idPrefix if provided) */
+  uniqueId: string;
+  /** Inclusive start & exclusive end character offsets in the original string */
+  startChar: number;
+  endChar: number;
+  /** Full header path stack (e.g., ["Intro", "Goals"]) */
+  headerPath: string[];
+}
+
+export interface Chunk {
+  content: string;
+  meta: ChunkMeta;
+}
+
+// Internal data structures
+interface HeaderToken {
+  level: number; // 1..6
+  text: string;
+  start: number; // index in original string
+  end: number;
+}
+
+interface CodeBlockToken {
+  start: number;
+  end: number;
+  fence: '```' | '~~~';
+  infoString?: string; // e.g. "ts", "python"
+}
+
+interface Segment {
+  start: number;
+  end: number;
+}
+
+interface Tokens {
+  headers: HeaderToken[];
+  codeBlocks: CodeBlockToken[];
+}
+
+export class RecursiveMarkdownSplitter {
+  private readonly options: Required<SplitOptions>;
+
+  constructor(options: SplitOptions = {}) {
+    this.options = {
+      maxChars: options.maxChars ?? 2048,
+      minChars: options.minChars ?? 500,
+      overlap: options.overlap ?? 256,
+      headerLevels: options.headerLevels ?? [1, 2],
+      preserveCodeBlocks: options.preserveCodeBlocks ?? true,
+      idPrefix: options.idPrefix ?? '',
+      trim: options.trim ?? true,
+    };
+
+    // Validate options
+    if (this.options.maxChars <= 0) {
+      throw new Error(
+        `maxChars must be positive, got ${this.options.maxChars}`,
+      );
+    }
+    if (this.options.minChars < 0) {
+      throw new Error(
+        `minChars must be non-negative, got ${this.options.minChars}`,
+      );
+    }
+    if (this.options.overlap < 0) {
+      throw new Error(
+        `overlap must be non-negative, got ${this.options.overlap}`,
+      );
+    }
+    if (this.options.overlap >= this.options.maxChars) {
+      throw new Error(
+        `Overlap (${this.options.overlap}) must be less than maxChars (${this.options.maxChars})`,
+      );
+    }
+    if (this.options.minChars >= this.options.maxChars) {
+      throw new Error(
+        `minChars (${this.options.minChars}) must be less than maxChars (${this.options.maxChars})`,
+      );
+    }
+    if (this.options.headerLevels.length === 0) {
+      throw new Error('headerLevels must contain at least one level');
+    }
+    if (this.options.headerLevels.some((level) => level < 1 || level > 6)) {
+      throw new Error('headerLevels must contain values between 1 and 6');
+    }
+  }
+
+  /**
+   * Main entry point to split markdown into chunks
+   */
+  public splitMarkdownToChunks(markdown: string): Chunk[] {
+    // Handle empty input
+    if (!markdown || markdown.trim().length === 0) {
+      return [];
+    }
+
+    // Normalize line endings
+    const normalizedMarkdown = markdown.replace(/\r\n/g, '\n');
+
+    // Tokenize the markdown
+    const tokens = this.tokenize(normalizedMarkdown);
+
+    // Recursively split into segments
+    const rootSegment: Segment = { start: 0, end: normalizedMarkdown.length };
+    const segments = this.recursivelySplit(
+      rootSegment,
+      normalizedMarkdown,
+      tokens,
+    );
+
+    // Merge small segments to avoid tiny chunks
+    const mergedSegments = this.mergeSmallSegments(
+      segments,
+      normalizedMarkdown,
+      tokens.codeBlocks,
+    );
+
+    // Apply overlap and assemble chunks
+    const rawChunks = this.assembleChunksWithOverlap(
+      mergedSegments,
+      normalizedMarkdown,
+      tokens.codeBlocks,
+    );
+
+    // Attach metadata
+    return this.attachMetadata(rawChunks, normalizedMarkdown, tokens.headers);
+  }
+
+  /**
+   * Tokenize markdown to extract headers and code blocks
+   */
+  private tokenize(markdown: string): Tokens {
+    const headers: HeaderToken[] = [];
+    const codeBlocks: CodeBlockToken[] = [];
+
+    // Find all headers
+    const headerRegex = /^(#{1,6})\s+(.+?)(?:\s*#*)?$/gm;
+    let match: RegExpExecArray | null;
+
+    while ((match = headerRegex.exec(markdown)) !== null) {
+      const level = match[1].length;
+      const text = match[2].trim();
+      const start = match.index;
+      const end = match.index + match[0].length;
+
+      headers.push({ level, text, start, end });
+    }
+
+    // Find all code blocks
+    this.findCodeBlocks(markdown, codeBlocks);
+
+    // Filter out headers that are inside code blocks
+    const filteredHeaders = headers.filter((header) => {
+      return !codeBlocks.some(
+        (block) => header.start >= block.start && header.end <= block.end,
+      );
+    });
+
+    return { headers: filteredHeaders, codeBlocks };
+  }
+
+  /**
+   * Find all fenced code blocks in the markdown
+   */
+  private findCodeBlocks(markdown: string, codeBlocks: CodeBlockToken[]): void {
+    const lines = markdown.split('\n');
+    let inCodeBlock = false;
+    let currentBlock: Partial<CodeBlockToken> | null = null;
+    let charIndex = 0;
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      const fenceMatch = line.match(/^(```+|~~~+)(.*)$/);
+
+      if (fenceMatch) {
+        const fence = fenceMatch[1].substring(0, 3) as '```' | '~~~';
+
+        if (!inCodeBlock) {
+          // Starting a code block
+          inCodeBlock = true;
+          currentBlock = {
+            start: charIndex,
+            fence,
+            infoString: fenceMatch[2].trim() || undefined,
+          };
+        } else if (currentBlock && line.startsWith(currentBlock.fence)) {
+          // Ending a code block
+          currentBlock.end = charIndex + line.length;
+          codeBlocks.push(currentBlock as CodeBlockToken);
+          inCodeBlock = false;
+          currentBlock = null;
+        }
+      }
+
+      charIndex += line.length + 1; // +1 for newline
+    }
+
+    // Handle unclosed code block
+    if (currentBlock && inCodeBlock) {
+      logger.warn(
+        'Unclosed code block detected, treating remaining content as plain text',
+      );
+    }
+  }
+
+  /**
+   * Recursively split a segment into smaller segments
+   */
+  private recursivelySplit(
+    segment: Segment,
+    markdown: string,
+    tokens: Tokens,
+  ): Segment[] {
+    const segmentText = markdown.slice(segment.start, segment.end);
+
+    // Base case: segment is within size limit
+    if (segmentText.length <= this.options.maxChars) {
+      return [segment];
+    }
+
+    // Try to split by headers
+    const headerSplits = this.splitByHeaders(segment, markdown, tokens);
+    if (headerSplits.length > 1) {
+      return headerSplits.flatMap((s) =>
+        this.recursivelySplit(s, markdown, tokens),
+      );
+    }
+
+    // Try to split by paragraphs
+    const paragraphSplits = this.splitByParagraphs(
+      segment,
+      markdown,
+      tokens.codeBlocks,
+    );
+    if (paragraphSplits.length > 1) {
+      return paragraphSplits.flatMap((s) =>
+        this.recursivelySplit(s, markdown, tokens),
+      );
+    }
+
+    // Try to split by lines
+    const lineSplits = this.splitByLines(segment, markdown, tokens.codeBlocks);
+    if (lineSplits.length > 1) {
+      return lineSplits.flatMap((s) =>
+        this.recursivelySplit(s, markdown, tokens),
+      );
+    }
+
+    // Cannot split further - return as is (may exceed maxChars)
+    if (segmentText.length > this.options.maxChars) {
+      // Check if it's a single code block
+      const isCodeBlock = tokens.codeBlocks.some(
+        (block) => block.start <= segment.start && block.end >= segment.end,
+      );
+      if (isCodeBlock) {
+        logger.warn(
+          `Code block exceeds maxChars (${segmentText.length} > ${this.options.maxChars})`,
+        );
+      } else {
+        logger.warn(
+          `Segment exceeds maxChars and cannot be split further (${segmentText.length} > ${this.options.maxChars})`,
+        );
+      }
+    }
+
+    return [segment];
+  }
+
+  /**
+   * Try to split segment by headers
+   */
+  private splitByHeaders(
+    segment: Segment,
+    markdown: string,
+    tokens: Tokens,
+  ): Segment[] {
+    // Find headers within this segment that are configured split levels
+    const segmentHeaders = tokens.headers.filter(
+      (h) =>
+        h.start >= segment.start &&
+        h.end <= segment.end &&
+        this.options.headerLevels.includes(h.level as 1 | 2),
+    );
+
+    if (segmentHeaders.length === 0) {
+      return [segment];
+    }
+
+    // Sort by position
+    segmentHeaders.sort((a, b) => a.start - b.start);
+
+    const segments: Segment[] = [];
+
+    // Handle content before first header
+    if (segmentHeaders[0].start > segment.start) {
+      segments.push({ start: segment.start, end: segmentHeaders[0].start });
+    }
+
+    // Process each header
+    for (let i = 0; i < segmentHeaders.length; i++) {
+      const header = segmentHeaders[i];
+      const nextHeader =
+        i + 1 < segmentHeaders.length ? segmentHeaders[i + 1] : null;
+
+      // Determine where this header's section ends
+      const sectionEnd = nextHeader ? nextHeader.start : segment.end;
+
+      // Create segment starting from this header
+      segments.push({ start: header.start, end: sectionEnd });
+    }
+
+    // Validate: ensure complete coverage with no gaps or overlaps
+    if (segments.length > 0) {
+      // Check first segment starts at segment beginning
+      if (segments[0].start !== segment.start) {
+        logger.error(
+          `First segment doesn't start at segment beginning: ${segments[0].start} vs ${segment.start}`,
+        );
+      }
+
+      // Check last segment ends at segment end
+      if (segments[segments.length - 1].end !== segment.end) {
+        logger.error(
+          `Last segment doesn't end at segment end: ${segments[segments.length - 1].end} vs ${segment.end}`,
+        );
+      }
+
+      // Check for gaps or overlaps between consecutive segments
+      for (let i = 1; i < segments.length; i++) {
+        if (segments[i].start !== segments[i - 1].end) {
+          logger.error(
+            `Gap or overlap detected between segments: ${segments[i - 1].end} to ${segments[i].start}`,
+          );
+        }
+      }
+    }
+
+    return segments.length > 1 ? segments : [segment];
+  }
+
+  /**
+   * Try to split segment by paragraphs (double newlines)
+   */
+  private splitByParagraphs(
+    segment: Segment,
+    markdown: string,
+    codeBlocks: CodeBlockToken[],
+  ): Segment[] {
+    const segmentText = markdown.slice(segment.start, segment.end);
+    const segments: Segment[] = [];
+
+    // Find paragraph boundaries (double newlines)
+    const paragraphRegex = /\n\n+/g;
+    let currentStart = 0;
+    let match: RegExpExecArray | null;
+    const splitPoints: number[] = [];
+
+    // Collect all valid split points
+    while ((match = paragraphRegex.exec(segmentText)) !== null) {
+      const splitPoint = segment.start + match.index + match[0].length;
+      // Check if split point is inside a code block
+      if (!this.isInsideCodeBlock(splitPoint, codeBlocks)) {
+        splitPoints.push(match.index + match[0].length);
+      }
+    }
+
+    // Create segments based on split points
+    for (const splitPoint of splitPoints) {
+      segments.push({
+        start: segment.start + currentStart,
+        end: segment.start + splitPoint,
+      });
+      currentStart = splitPoint;
+    }
+
+    // Add final segment if there's remaining content
+    if (currentStart < segmentText.length) {
+      segments.push({
+        start: segment.start + currentStart,
+        end: segment.end,
+      });
+    }
+
+    return segments.length > 1 ? segments : [segment];
+  }
+
+  /**
+   * Try to split segment by lines
+   */
+  private splitByLines(
+    segment: Segment,
+    markdown: string,
+    codeBlocks: CodeBlockToken[],
+  ): Segment[] {
+    const segmentText = markdown.slice(segment.start, segment.end);
+    const lines = segmentText.split('\n');
+    const segments: Segment[] = [];
+
+    let currentStart = segment.start;
+    let currentLength = 0;
+    let lineStart = segment.start;
+
+    for (let i = 0; i < lines.length; i++) {
+      const lineLength = lines[i].length + 1; // +1 for newline
+
+      if (
+        currentLength + lineLength > this.options.maxChars &&
+        currentLength > 0
+      ) {
+        // Check if we can split here
+        if (!this.isInsideCodeBlock(lineStart, codeBlocks)) {
+          segments.push({
+            start: currentStart,
+            end: lineStart,
+          });
+          currentStart = lineStart;
+          currentLength = lineLength;
+        } else {
+          currentLength += lineLength;
+        }
+      } else {
+        currentLength += lineLength;
+      }
+
+      lineStart += lineLength;
+    }
+
+    // Add final segment
+    if (currentStart < segment.end) {
+      segments.push({
+        start: currentStart,
+        end: segment.end,
+      });
+    }
+
+    return segments.length > 1 ? segments : [segment];
+  }
+
+  /**
+   * Check if a position is inside a code block
+   */
+  private isInsideCodeBlock(
+    position: number,
+    codeBlocks: CodeBlockToken[],
+  ): boolean {
+    return codeBlocks.some(
+      (block) => position >= block.start && position < block.end,
+    );
+  }
+
+  /**
+   * Merge segments that are too small with adjacent segments
+   */
+  private mergeSmallSegments(
+    segments: Segment[],
+    markdown: string,
+    codeBlocks: CodeBlockToken[],
+  ): Segment[] {
+    if (segments.length <= 1) return segments;
+
+    const mergedSegments: Segment[] = [];
+    let currentSegment: Segment | null = null;
+
+    for (let i = 0; i < segments.length; i++) {
+      const segment = segments[i];
+      const segmentLength = segment.end - segment.start;
+      const isLastSegment = i === segments.length - 1;
+
+      if (currentSegment === null) {
+        currentSegment = { ...segment };
+      } else {
+        const currentLength = currentSegment.end - currentSegment.start;
+        const combinedLength =
+          currentSegment.end - currentSegment.start + segmentLength;
+
+        // Determine if we should merge
+        const shouldMerge =
+          // Either segment is too small
+          ((segmentLength < this.options.minChars ||
+            currentLength < this.options.minChars) &&
+            // And merging won't exceed maxChars
+            combinedLength <= this.options.maxChars) ||
+          // OR this is the last segment and it's too small
+          (isLastSegment && segmentLength < this.options.minChars);
+
+        if (shouldMerge) {
+          // Merge by extending current segment
+          currentSegment.end = segment.end;
+        } else {
+          // Don't merge - push current and start new
+          mergedSegments.push(currentSegment);
+          currentSegment = { ...segment };
+        }
+      }
+    }
+
+    // Don't forget the last segment
+    if (currentSegment !== null) {
+      // Special handling for final segment if it's still too small
+      const currentLength = currentSegment.end - currentSegment.start;
+      if (currentLength < this.options.minChars && mergedSegments.length > 0) {
+        // Try to merge with previous segment
+        const lastMerged = mergedSegments[mergedSegments.length - 1];
+        const combinedLength =
+          lastMerged.end - lastMerged.start + currentLength;
+
+        if (combinedLength <= this.options.maxChars * 1.5) {
+          // Allow some flexibility for the final merge to avoid tiny final chunks
+          lastMerged.end = currentSegment.end;
+        } else {
+          // Can't merge without significantly exceeding limits
+          mergedSegments.push(currentSegment);
+        }
+      } else {
+        mergedSegments.push(currentSegment);
+      }
+    }
+
+    // Final pass: ensure no segment ends in the middle of a code block
+    const finalSegments: Segment[] = [];
+    for (const segment of mergedSegments) {
+      let adjustedEnd = segment.end;
+
+      // Check if segment end is inside a code block
+      for (const block of codeBlocks) {
+        if (segment.end > block.start && segment.end < block.end) {
+          // Extend to include the entire code block
+          adjustedEnd = block.end;
+          break;
+        }
+      }
+
+      finalSegments.push({
+        start: segment.start,
+        end: adjustedEnd,
+      });
+    }
+
+    return finalSegments;
+  }
+
+  /**
+   * Assemble chunks with overlap handling
+   */
+  private assembleChunksWithOverlap(
+    segments: Segment[],
+    markdown: string,
+    codeBlocks: CodeBlockToken[],
+  ): Array<{
+    content: string;
+    start: number;
+    end: number;
+    overlapStart?: number;
+  }> {
+    if (segments.length === 0) return [];
+
+    const chunks: Array<{
+      content: string;
+      start: number;
+      end: number;
+      overlapStart?: number;
+    }> = [];
+
+    for (let i = 0; i < segments.length; i++) {
+      const segment = segments[i];
+      let content = markdown.slice(segment.start, segment.end);
+      let chunkStart = segment.start;
+
+      // For chunks after the first, prepend overlap from previous segment
+      if (i > 0 && this.options.overlap > 0) {
+        const prevSegment = segments[i - 1];
+        const prevContent = markdown.slice(prevSegment.start, prevSegment.end);
+
+        // Calculate how much overlap to take from the previous segment
+        const overlapLength = Math.min(
+          this.options.overlap,
+          prevContent.length,
+        );
+        let overlapStart = prevContent.length - overlapLength;
+
+        // Check if the overlap would start in the middle of a code block
+        const overlapAbsoluteStart = prevSegment.start + overlapStart;
+        for (const block of codeBlocks) {
+          if (
+            overlapAbsoluteStart > block.start &&
+            overlapAbsoluteStart < block.end
+          ) {
+            // Overlap would start inside a code block
+            if (block.end <= prevSegment.end) {
+              // The code block ends within the previous segment
+              // Start overlap after the code block to avoid duplication
+              const blockEndInSegment = block.end - prevSegment.start;
+              if (blockEndInSegment < prevContent.length) {
+                overlapStart = blockEndInSegment;
+              }
+            }
+            break;
+          }
+        }
+
+        // Extract overlap text from the adjusted position
+        const overlapText = prevContent.slice(overlapStart);
+
+        // Prepend overlap to current content
+        content = overlapText + content;
+
+        // Track where the actual content starts (including overlap)
+        chunkStart = prevSegment.start + overlapStart;
+      }
+
+      chunks.push({
+        content: this.options.trim ? content.trim() : content,
+        start: chunkStart, // Now reflects the actual start including overlap
+        end: segment.end,
+        overlapStart: i > 0 ? segment.start : undefined, // Original segment start for reference
+      });
+    }
+
+    return chunks;
+  }
+
+  /**
+   * Attach metadata to chunks
+   */
+  private attachMetadata(
+    rawChunks: Array<{ content: string; start: number; end: number }>,
+    markdown: string,
+    headers: HeaderToken[],
+  ): Chunk[] {
+    const chunks: Chunk[] = [];
+    const titleCounts = new Map<string, number>();
+
+    for (const rawChunk of rawChunks) {
+      // Find the last header before or within this chunk that's in our configured levels
+      let title = 'ROOT';
+      let headerPath: string[] = [];
+
+      // Build full header path from all headers up to the end of this chunk
+      const allHeadersBeforeEnd = headers.filter((h) => h.start < rawChunk.end);
+      const headerStack: { level: number; text: string }[] = [];
+
+      for (const header of allHeadersBeforeEnd) {
+        // Pop headers from stack that are same or lower level
+        while (
+          headerStack.length > 0 &&
+          headerStack[headerStack.length - 1].level >= header.level
+        ) {
+          headerStack.pop();
+        }
+        headerStack.push({ level: header.level, text: header.text });
+      }
+
+      headerPath = headerStack.map((h) => h.text);
+
+      // Find title from configured levels - check headers within the chunk first
+      const headersInChunk = headers.filter(
+        (h) =>
+          h.start >= rawChunk.start &&
+          h.start < rawChunk.end &&
+          this.options.headerLevels.includes(h.level as 1 | 2),
+      );
+
+      if (headersInChunk.length > 0) {
+        // Use the first configured header within the chunk
+        title = headersInChunk[0].text;
+      } else {
+        // Otherwise, use the last configured header before the chunk
+        for (let i = headerStack.length - 1; i >= 0; i--) {
+          if (
+            this.options.headerLevels.includes(headerStack[i].level as 1 | 2)
+          ) {
+            title = headerStack[i].text;
+            break;
+          }
+        }
+      }
+
+      // Track chunk numbers per title (0-based)
+      const count = titleCounts.get(title) || 0;
+      titleCounts.set(title, count + 1);
+
+      // Generate unique ID using 0-based numbering
+      const slug = this.slugify(title);
+      const uniqueId = this.options.idPrefix
+        ? `${this.options.idPrefix}-${slug}-${count}`
+        : `${slug}-${count}`;
+
+      chunks.push({
+        content: rawChunk.content,
+        meta: {
+          title,
+          chunkNumber: count,
+          uniqueId,
+          startChar: rawChunk.start,
+          endChar: rawChunk.end,
+          headerPath,
+        },
+      });
+    }
+
+    return chunks;
+  }
+
+  /**
+   * Convert a string to a slug
+   */
+  private slugify(text: string): string {
+    return text
+      .toLowerCase()
+      .replace(/[^\w\s-]/g, '') // Remove non-word characters
+      .replace(/\s+/g, '-') // Replace spaces with hyphens
+      .replace(/-+/g, '-') // Replace multiple hyphens with single
+      .replace(/^-+|-+$/g, ''); // Remove leading/trailing hyphens
+  }
+}
+
+// Export the main function as well for convenience
+export function splitMarkdownToChunks(
+  markdown: string,
+  opts?: SplitOptions,
+): Chunk[] {
+  const splitter = new RecursiveMarkdownSplitter(opts);
+  return splitter.splitMarkdownToChunks(markdown);
+}
diff --git a/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.finalChunk.test.ts b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.finalChunk.test.ts
new file mode 100644
index 00000000..c4249322
--- /dev/null
+++ b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.finalChunk.test.ts
@@ -0,0 +1,169 @@
+import { RecursiveMarkdownSplitter } from '../RecursiveMarkdownSplitter';
+
+describe('RecursiveMarkdownSplitter - Final chunk handling', () => {
+  it('should deterministically handle final tiny chunks', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 100,
+      minChars: 50,
+      overlap: 10,
+      headerLevels: [1, 2],
+      trim: true,
+    });
+
+    // Text that will create a tiny final chunk
+    const text = `# Section One
+This is the first section with enough content to meet the minimum character requirement.
+
+# Section Two
+This is the second section with enough content to meet the minimum character requirement.
+
+# Section Three
+Tiny bit.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Debug output
+    console.log(
+      'Chunks:',
+      chunks.map((c) => ({
+        title: c.meta.title,
+        length: c.content.length,
+        preview: c.content.substring(0, 30).replace(/\n/g, '\\n'),
+      })),
+    );
+
+    // The final tiny chunk should be merged with the previous one
+    const lastChunk = chunks[chunks.length - 1];
+
+    // Verify the tiny content was handled appropriately
+    const hasTinyContent = chunks.some((c) => c.content.includes('Tiny bit'));
+    expect(hasTinyContent).toBe(true);
+
+    // The tiny section should not be on its own
+    const tinyChunk = chunks.find((c) => c.meta.title === 'Section Three');
+    if (tinyChunk) {
+      expect(tinyChunk.content.length).toBeGreaterThanOrEqual(50); // Should meet minChars
+    }
+  });
+
+  it('should handle multiple tiny segments at the end', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 100,
+      minChars: 40,
+      overlap: 0,
+      headerLevels: [1],
+      trim: true,
+    });
+
+    const text = `# Main Section
+This is the main section with sufficient content to be a proper chunk.
+
+# Tiny 1
+Small.
+
+# Tiny 2
+Also small.
+
+# Tiny 3
+Very small.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // All tiny sections should be merged together
+    expect(chunks.length).toBe(2);
+
+    const lastChunk = chunks[chunks.length - 1];
+    expect(lastChunk.content).toContain('Tiny 1');
+    expect(lastChunk.content).toContain('Tiny 2');
+    expect(lastChunk.content).toContain('Tiny 3');
+  });
+
+  it('should not exceed maxChars significantly when merging final chunk', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 50,
+      minChars: 30,
+      overlap: 0,
+      headerLevels: [1],
+      trim: true,
+    });
+
+    const text = `# Section One
+This section has exactly the right amount of content.
+
+# Section Two
+This section also has exactly the right amount of content.
+
+# Tiny
+End.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Check that tiny chunks are handled appropriately
+    const lastChunk = chunks[chunks.length - 1];
+
+    // If there's a tiny chunk, it should either be merged or meet minChars
+    if (lastChunk.meta.title === 'Tiny') {
+      expect(lastChunk.content.length).toBeGreaterThanOrEqual(30);
+    }
+
+    // No chunk should be excessively large
+    chunks.forEach((chunk) => {
+      expect(chunk.content.length).toBeLessThanOrEqual(75); // 1.5x maxChars
+    });
+  });
+
+  it('should handle edge case where all segments are tiny', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 100,
+      minChars: 50,
+      overlap: 0,
+      headerLevels: [1],
+      trim: true,
+    });
+
+    const text = `# A
+Short.
+
+# B
+Brief.
+
+# C
+Tiny.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // All should be merged into one chunk
+    expect(chunks.length).toBe(1);
+    expect(chunks[0].content).toContain('# A');
+    expect(chunks[0].content).toContain('# B');
+    expect(chunks[0].content).toContain('# C');
+  });
+
+  it('should preserve code blocks when merging final chunks', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 100,
+      minChars: 50,
+      overlap: 0,
+      preserveCodeBlocks: true,
+      trim: true,
+    });
+
+    const text = `# Section One
+Content before code block.
+
+\`\`\`python
+def hello():
+    print("Hello")
+\`\`\`
+
+# Tiny Section
+End.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Code block should be preserved intact
+    const codeChunk = chunks.find((c) => c.content.includes('def hello()'));
+    expect(codeChunk).toBeDefined();
+    expect(codeChunk!.content).toMatch(/```python[\s\S]*?```/);
+  });
+});
diff --git a/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.minChars.test.ts b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.minChars.test.ts
new file mode 100644
index 00000000..a5b6578a
--- /dev/null
+++ b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.minChars.test.ts
@@ -0,0 +1,135 @@
+import { RecursiveMarkdownSplitter } from '../RecursiveMarkdownSplitter';
+
+describe('RecursiveMarkdownSplitter - minChars functionality', () => {
+  it('should merge segments smaller than minChars', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 200,
+      minChars: 100,
+      overlap: 0,
+      headerLevels: [1, 2],
+    });
+
+    const text = `# Section 1
+Short content.
+
+# Section 2
+Also short.
+
+# Section 3
+This is a bit longer content that might be closer to the minimum.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // With minChars=100, the short sections should be merged
+    expect(chunks.length).toBeLessThan(3);
+
+    // All chunks should be at least minChars (except possibly the last one)
+    chunks.forEach((chunk, index) => {
+      if (index < chunks.length - 1) {
+        expect(chunk.content.length).toBeGreaterThanOrEqual(100);
+      }
+    });
+  });
+
+  it('should not merge if it would exceed maxChars', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 100,
+      minChars: 50,
+      overlap: 0,
+      headerLevels: [1, 2],
+    });
+
+    const text = `# Section 1
+This section has exactly enough content to be close to the max limit when combined with another section. It's quite long.
+
+# Section 2
+This section is also substantial with a good amount of content that would exceed limits.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Should not merge if combined length would exceed maxChars significantly
+    // With the 1.5x flexibility for final chunks, they might merge if total < 150 chars
+    // Let's verify chunks are reasonably sized
+    chunks.forEach((chunk) => {
+      expect(chunk.content.length).toBeLessThanOrEqual(150); // 1.5x maxChars
+    });
+
+    // If chunks are merged, ensure it's within reasonable bounds
+    if (chunks.length === 1) {
+      expect(chunks[0].content.length).toBeLessThanOrEqual(150);
+    }
+  });
+
+  it('should handle the problematic formatting example', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 500,
+      minChars: 200,
+      overlap: 0,
+      headerLevels: [1, 2],
+      preserveCodeBlocks: true,
+    });
+
+    const text = `## Formatting and Debugging
+
+The \`core::fmt\` module provides functionality for formatting values.
+
+### Debug Trait
+
+The \`Debug\` trait is used for debug formatting.
+
+\`\`\`cairo
+pub trait Debug<T>
+\`\`\`
+
+#### \`fmt\` Function
+
+The \`fmt\` function within the \`Debug\` trait is responsible for formatting.
+
+### Display Trait
+
+The \`Display\` trait is used for standard formatting.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Should create fewer, more substantial chunks
+    expect(chunks.length).toBeLessThanOrEqual(2);
+
+    // Each chunk should be meaningful in size
+    chunks.forEach((chunk) => {
+      expect(chunk.content.length).toBeGreaterThan(100);
+    });
+  });
+
+  it('should respect code block boundaries when merging', () => {
+    const splitter = new RecursiveMarkdownSplitter({
+      maxChars: 300,
+      minChars: 150,
+      overlap: 0,
+      headerLevels: [1, 2],
+      preserveCodeBlocks: true,
+    });
+
+    const text = `# Section 1
+Short intro.
+
+\`\`\`cairo
+// This is a long code block
+fn example() -> felt252 {
+    let x = 42;
+    let y = x * 2;
+    return y;
+}
+\`\`\`
+
+# Section 2
+Another short section.`;
+
+    const chunks = splitter.splitMarkdownToChunks(text);
+
+    // Verify code blocks are not split
+    chunks.forEach((chunk) => {
+      const codeBlockMatches = chunk.content.match(/```/g) || [];
+      expect(codeBlockMatches.length % 2).toBe(0);
+    });
+  });
+});
diff --git a/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.reconstruction.test.ts b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.reconstruction.test.ts
new file mode 100644
index 00000000..b8be51f8
--- /dev/null
+++ b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.reconstruction.test.ts
@@ -0,0 +1,433 @@
+import {
+  RecursiveMarkdownSplitter,
+  SplitOptions,
+} from '../RecursiveMarkdownSplitter';
+
+describe('RecursiveMarkdownSplitter - Reconstruction Tests', () => {
+  /**
+   * These tests verify that when we split a document and then concatenate
+   * the chunks (excluding overlaps), we get back the original content.
+   * This ensures our splitting logic doesn't lose or duplicate content.
+   */
+
+  function reconstructFromChunks(
+    chunks: Array<{
+      content: string;
+      start: number;
+      end: number;
+      overlapStart?: number;
+    }>,
+    original: string,
+  ): string {
+    if (chunks.length === 0) return '';
+
+    let result = '';
+    let lastEnd = 0;
+
+    for (let i = 0; i < chunks.length; i++) {
+      const chunk = chunks[i];
+
+      if (i === 0) {
+        // First chunk - use entire content
+        result = original.substring(chunk.start, chunk.end);
+        lastEnd = chunk.end;
+      } else if (chunk.overlapStart !== undefined) {
+        // Subsequent chunks with overlap - append only the non-overlapped portion
+        result += original.substring(chunk.overlapStart, chunk.end);
+        lastEnd = chunk.end;
+      } else {
+        // No overlap tracking - shouldn't happen but handle gracefully
+        result += original.substring(lastEnd, chunk.end);
+        lastEnd = chunk.end;
+      }
+    }
+
+    return result;
+  }
+
+  describe('Header splitting reconstruction', () => {
+    it('should reconstruct document with single header', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 10,
+        headerLevels: [1],
+        trim: false, // Important for exact reconstruction
+      });
+
+      const original = `# Header One
+This is the first section with some content.
+
+More content in the first section.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      // Extract the raw chunks before metadata attachment
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with multiple headers at same level', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 60,
+        minChars: 0,
+        overlap: 15,
+        headerLevels: [1],
+        trim: false,
+      });
+
+      const original = `# First Section
+Content for the first section goes here.
+
+# Second Section
+Content for the second section goes here.
+
+# Third Section
+Content for the third section goes here.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      // Extract raw chunks
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with nested headers', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 80,
+        minChars: 0,
+        overlap: 20,
+        headerLevels: [1, 2],
+        trim: false,
+      });
+
+      const original = `# Main Section
+Introduction to the main section.
+
+## Subsection 1
+Details about subsection 1.
+
+## Subsection 2
+Details about subsection 2.
+
+# Another Main Section
+Content for another main section.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with headers at start', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 40,
+        minChars: 0,
+        overlap: 10,
+        headerLevels: [1],
+        trim: false,
+      });
+
+      const original = `# Header at Start
+Content immediately after header.
+
+More content here.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with content before first header', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 10,
+        headerLevels: [1],
+        trim: false,
+      });
+
+      const original = `Some preamble text before any headers.
+
+# First Header
+Content under first header.
+
+# Second Header
+Content under second header.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with consecutive headers', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 60,
+        minChars: 0,
+        overlap: 10,
+        headerLevels: [1, 2],
+        trim: false,
+      });
+
+      const original = `# Main Header
+## Subheader 1
+## Subheader 2
+Content after headers.
+
+## Subheader 3
+More content.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+  });
+
+  describe('Code block reconstruction', () => {
+    it('should reconstruct document with code blocks', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 60,
+        minChars: 0,
+        overlap: 15,
+        preserveCodeBlocks: true,
+        trim: false,
+      });
+
+      const original = `# Section with Code
+Some text before code.
+
+\`\`\`python
+def hello():
+    print("Hello, World!")
+\`\`\`
+
+Text after code block.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+
+    it('should reconstruct document with large code block', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 10,
+        preserveCodeBlocks: true,
+        trim: false,
+      });
+
+      const original = `# Code Example
+Here's a large code block:
+
+\`\`\`javascript
+// This is a large code block that exceeds maxChars
+function complexFunction() {
+    const result = performCalculation();
+    return result;
+}
+\`\`\`
+
+Text after the code.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+  });
+
+  describe('Complex document reconstruction', () => {
+    it('should reconstruct a complex markdown document', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 20,
+        overlap: 25,
+        headerLevels: [1, 2],
+        preserveCodeBlocks: true,
+        trim: false,
+      });
+
+      const original = `# Cairo Programming Guide
+
+Welcome to the Cairo programming guide. This document covers the basics.
+
+## Getting Started
+
+To get started with Cairo, you need to understand the fundamentals.
+
+### Installation
+
+First, install the Cairo compiler:
+
+\`\`\`bash
+curl -L https://github.com/starkware-libs/cairo/releases/download/v2.0.0/cairo-lang-2.0.0.tar.gz | tar xz
+cd cairo-lang-2.0.0
+./install.sh
+\`\`\`
+
+### Your First Program
+
+Here's a simple Cairo program:
+
+\`\`\`cairo
+fn main() {
+    let x = 1;
+    let y = 2;
+    assert(x + y == 3, 'Math is broken!');
+}
+\`\`\`
+
+## Advanced Topics
+
+Once you understand the basics, you can explore advanced features.
+
+### Memory Management
+
+Cairo uses a unique memory model based on field elements.
+
+### Smart Contracts
+
+You can write smart contracts in Cairo for StarkNet.
+
+## Conclusion
+
+Cairo is a powerful language for writing provable programs.`;
+
+      const chunks = splitter.splitMarkdownToChunks(original);
+
+      const rawChunks = (splitter as any).assembleChunksWithOverlap(
+        (splitter as any).mergeSmallSegments(
+          (splitter as any).recursivelySplit(
+            { start: 0, end: original.length },
+            original,
+            (splitter as any).tokenize(original),
+          ),
+          original,
+          (splitter as any).tokenize(original).codeBlocks,
+        ),
+        original,
+        (splitter as any).tokenize(original).codeBlocks,
+      );
+
+      const reconstructed = reconstructFromChunks(rawChunks, original);
+      expect(reconstructed).toBe(original);
+    });
+  });
+});
diff --git a/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.test.ts b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.test.ts
new file mode 100644
index 00000000..67d93e68
--- /dev/null
+++ b/packages/ingester/src/utils/__tests__/RecursiveMarkdownSplitter.test.ts
@@ -0,0 +1,544 @@
+import {
+  RecursiveMarkdownSplitter,
+  SplitOptions,
+  Chunk,
+} from '../RecursiveMarkdownSplitter';
+
+describe('RecursiveMarkdownSplitter', () => {
+  describe('Basic functionality', () => {
+    it('should handle empty input', () => {
+      const splitter = new RecursiveMarkdownSplitter();
+      expect(splitter.splitMarkdownToChunks('')).toEqual([]);
+      expect(splitter.splitMarkdownToChunks('   ')).toEqual([]);
+    });
+
+    it('should handle single small chunk', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+      const text = 'This is a small chunk of text.';
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0].content).toBe(text);
+      expect(chunks[0].meta.title).toBe('ROOT');
+      expect(chunks[0].meta.chunkNumber).toBe(0);
+    });
+
+    it('should throw error when overlap >= maxChars', () => {
+      expect(() => {
+        new RecursiveMarkdownSplitter({
+          maxChars: 100,
+          minChars: 0,
+          overlap: 100,
+        });
+      }).toThrow('Overlap (100) must be less than maxChars (100)');
+    });
+  });
+
+  describe('Header detection and splitting', () => {
+    it('should split on H1 headers', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+        headerLevels: [1],
+      });
+
+      const text = `# First Section
+This is the first section content.
+
+# Second Section
+This is the second section content.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // Headers split the content, so we should have chunks for each section
+      const firstSectionChunk = chunks.find(
+        (c) => c.meta.title === 'First Section',
+      );
+      const secondSectionChunk = chunks.find(
+        (c) => c.meta.title === 'Second Section',
+      );
+
+      expect(firstSectionChunk).toBeDefined();
+      expect(secondSectionChunk).toBeDefined();
+    });
+
+    it('should split on both H1 and H2 headers', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+        headerLevels: [1, 2],
+      });
+
+      const text = `# Main Section
+Some intro text.
+
+## Subsection 1
+First subsection.
+
+## Subsection 2
+Second subsection.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks.length).toBeGreaterThanOrEqual(3);
+      expect(chunks[0].meta.title).toBe('Main Section');
+      expect(chunks.find((c) => c.meta.title === 'Subsection 1')).toBeDefined();
+      expect(chunks.find((c) => c.meta.title === 'Subsection 2')).toBeDefined();
+    });
+
+    it('should ignore headers inside code blocks', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 200,
+        minChars: 0,
+        overlap: 0,
+      });
+
+      const text = `# Real Header
+Some content.
+
+\`\`\`markdown
+# This is not a real header
+It's inside a code block
+\`\`\`
+
+More content.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0].meta.title).toBe('Real Header');
+      expect(chunks[0].content).toContain('# This is not a real header');
+    });
+
+    it('should handle headers with trailing hashes', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+      const text = '## Header with trailing hashes ##\nContent here.';
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks[0].meta.title).toBe('Header with trailing hashes');
+    });
+  });
+
+  describe('Code block handling', () => {
+    it('should not split inside code blocks', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+        preserveCodeBlocks: true,
+      });
+
+      const text = `Some text before.
+
+\`\`\`python
+def long_function():
+    # This is a long code block that exceeds maxChars
+    print("This should not be split")
+    return "Even though it's longer than 50 chars"
+\`\`\`
+
+Some text after.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // Verify code block is kept intact
+      const codeBlockChunk = chunks.find((c) =>
+        c.content.includes('def long_function()'),
+      );
+      expect(codeBlockChunk).toBeDefined();
+      expect(codeBlockChunk!.content).toContain('```python');
+      expect(codeBlockChunk!.content).toContain('```');
+    });
+
+    it('should handle tilde code fences', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 200,
+        minChars: 0,
+        overlap: 20,
+      });
+
+      const text = `Text before.
+
+~~~javascript
+const code = "This uses tilde fences";
+~~~
+
+Text after.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0].content).toContain('~~~javascript');
+      expect(chunks[0].content).toContain(
+        'const code = "This uses tilde fences"',
+      );
+    });
+
+    it('should handle nested code fences correctly', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 300,
+        minChars: 0,
+        overlap: 30,
+      });
+
+      const text = `\`\`\`markdown
+Example with nested fences:
+\`\`\`python
+print("nested")
+\`\`\`
+End of example
+\`\`\``;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0].content).toContain('Example with nested fences');
+    });
+  });
+
+  describe('Overlap handling', () => {
+    it('should apply backward overlap correctly', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 10,
+        headerLevels: [1],
+      });
+
+      const text = `# Section 1
+This is the first section with some content.
+
+# Section 2
+This is the second section with more content.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks.length).toBeGreaterThanOrEqual(2);
+
+      // Check that second chunk contains overlap from first
+      if (chunks.length >= 2) {
+        // The overlap should be at the beginning of the second chunk
+        const overlap = 10; // We set overlap to 10
+
+        // Calculate expected overlap position
+        const firstChunkEndIndex = chunks[0].meta.endChar;
+        const secondChunkStartIndex = chunks[1].meta.startChar;
+
+        // The start of second chunk should be overlap chars before the end of first chunk
+        expect(firstChunkEndIndex - secondChunkStartIndex).toBeLessThanOrEqual(
+          overlap,
+        );
+      }
+    });
+
+    it('should extend overlap to include entire code block', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 20,
+        preserveCodeBlocks: true,
+      });
+
+      const text = `First part of content here.
+
+\`\`\`
+code block content
+\`\`\`
+
+Second part starts here and continues with more text.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // If there are multiple chunks, verify code block handling
+      if (chunks.length > 1) {
+        const codeBlockInFirst = chunks[0].content.includes('```');
+        const codeBlockInSecond = chunks[1].content.includes('```');
+
+        // Code block should be complete in whichever chunk it appears
+        if (codeBlockInFirst) {
+          expect(chunks[0].content).toMatch(/```[\s\S]*?```/);
+        }
+        if (codeBlockInSecond) {
+          expect(chunks[1].content).toMatch(/```[\s\S]*?```/);
+        }
+      }
+    });
+  });
+
+  describe('Metadata generation', () => {
+    it('should generate correct unique IDs', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 5,
+        idPrefix: 'test',
+      });
+
+      const text = `# My Section
+This is content for the first section
+
+# My Section  
+This is content for the second section with the same title`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // Find all chunks with title "My Section"
+      const mySectionChunks = chunks.filter(
+        (c) => c.meta.title === 'My Section',
+      );
+
+      // Should have at least 2 chunks with this title
+      expect(mySectionChunks.length).toBeGreaterThanOrEqual(2);
+
+      // Check that they have different unique IDs with incrementing numbers
+      const uniqueIds = mySectionChunks.map((c) => c.meta.uniqueId);
+      expect(uniqueIds).toContain('test-my-section-0');
+      expect(uniqueIds).toContain('test-my-section-1');
+    });
+
+    it('should track header paths correctly', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 10,
+      });
+
+      const text = `# Chapter 1
+Intro to chapter one with some text
+
+## Section 1.1
+Content in section one point one
+
+### Subsection 1.1.1
+More content in the subsection
+
+## Section 1.2
+Other content in section one point two`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // This should create multiple chunks due to the smaller maxChars
+      expect(chunks.length).toBeGreaterThan(1);
+
+      // Find chunks based on their unique content
+      const section11Chunk = chunks.find((c) =>
+        c.content.includes('section one point one'),
+      );
+      const subsectionChunk = chunks.find((c) =>
+        c.content.includes('More content in the subsection'),
+      );
+      const section12Chunk = chunks.find((c) =>
+        c.content.includes('section one point two'),
+      );
+
+      // Check that chunks have appropriate header paths
+      if (section11Chunk) {
+        expect(section11Chunk.meta.headerPath).toContain('Chapter 1');
+        // Title should be Section 1.1 since that's the header for this content
+        expect(section11Chunk.meta.title).toBe('Section 1.1');
+      }
+
+      if (subsectionChunk) {
+        expect(subsectionChunk.meta.headerPath).toContain('Chapter 1');
+        // The subsection content should have appropriate headers in path
+        expect(
+          subsectionChunk.meta.headerPath.some(
+            (h) => h === 'Section 1.1' || h === 'Subsection 1.1.1',
+          ),
+        ).toBe(true);
+      }
+
+      if (section12Chunk) {
+        expect(section12Chunk.meta.headerPath).toContain('Chapter 1');
+        expect(section12Chunk.meta.title).toBe('Section 1.2');
+      }
+    });
+
+    it('should handle chunk numbering per title', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 30,
+        minChars: 0,
+        overlap: 0,
+      });
+
+      const text = `# Long Section
+This is a very long section that will definitely need to be split into multiple chunks because it exceeds our maximum character limit.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      const longSectionChunks = chunks.filter(
+        (c) => c.meta.title === 'Long Section',
+      );
+      expect(longSectionChunks.length).toBeGreaterThan(1);
+
+      // Check sequential numbering
+      longSectionChunks.forEach((chunk, index) => {
+        expect(chunk.meta.chunkNumber).toBe(index);
+      });
+    });
+
+    it('should slugify titles correctly', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 2048,
+        minChars: 0,
+        overlap: 256,
+      });
+
+      const text = `# Title with Special@#$ Characters!!!
+Content`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks[0].meta.uniqueId).toBe('title-with-special-characters-0');
+    });
+  });
+
+  describe('Splitting strategies', () => {
+    it('should fall back to paragraph splitting', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+      });
+
+      const text = `First paragraph with some content here.
+
+Second paragraph with more content here.
+
+Third paragraph with even more content.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks.length).toBeGreaterThanOrEqual(3);
+    });
+
+    it('should fall back to line splitting for very long lines', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+      });
+
+      // Create multiple lines that are each long but don't have paragraph breaks
+      const longLine =
+        'Line one that is quite long and exceeds our limit\n' +
+        'Line two that is also very long and exceeds limit\n' +
+        'Line three with even more text to ensure splitting';
+
+      const chunks = splitter.splitMarkdownToChunks(longLine);
+
+      expect(chunks.length).toBeGreaterThan(1);
+    });
+  });
+
+  describe('Edge cases', () => {
+    it('should handle documents with no headers', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+
+      const text =
+        'Just plain text without any headers. ' +
+        'This should still be chunked properly.';
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks.every((c) => c.meta.title === 'ROOT')).toBe(true);
+    });
+
+    it('should handle consecutive headers with no content', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+
+      const text = `# Header 1
+# Header 2
+# Header 3
+Some content here.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      // Should produce valid chunks even with empty sections
+      expect(chunks.length).toBeGreaterThan(0);
+      chunks.forEach((chunk) => {
+        expect(chunk.content.length).toBeGreaterThan(0);
+      });
+    });
+
+    it('should handle Windows line endings', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+
+      const text = '# Header\r\nContent with\r\nWindows line endings.';
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks).toHaveLength(1);
+      expect(chunks[0].meta.title).toBe('Header');
+      expect(chunks[0].content).not.toContain('\r');
+    });
+
+    it('should handle unclosed code blocks gracefully', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 100,
+        minChars: 0,
+        overlap: 10,
+      });
+
+      const text = `# Section
+Some content.
+
+\`\`\`python
+This code block is never closed
+and continues to the end`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      expect(chunks.length).toBeGreaterThan(0);
+      // Should still produce valid output
+    });
+  });
+
+  describe('Character offset tracking', () => {
+    it('should track start and end character positions correctly', () => {
+      const splitter = new RecursiveMarkdownSplitter({
+        maxChars: 50,
+        minChars: 0,
+        overlap: 0,
+      });
+
+      const text = `# Section 1
+Short content.
+
+# Section 2
+More content here.`;
+
+      const chunks = splitter.splitMarkdownToChunks(text);
+
+      chunks.forEach((chunk) => {
+        expect(chunk.meta.startChar).toBeGreaterThanOrEqual(0);
+        expect(chunk.meta.endChar).toBeGreaterThan(chunk.meta.startChar);
+        expect(
+          chunk.meta.endChar - chunk.meta.startChar,
+        ).toBeGreaterThanOrEqual(chunk.content.length);
+      });
+    });
+  });
+});
diff --git a/python/src/cairo_coder/optimizers/mcp_optimizer.py b/python/src/cairo_coder/optimizers/mcp_optimizer.py
index bb4ed581..28e9dc57 100644
--- a/python/src/cairo_coder/optimizers/mcp_optimizer.py
+++ b/python/src/cairo_coder/optimizers/mcp_optimizer.py
@@ -158,11 +158,12 @@ def forward(self, example, pred, trace=None):
             result = parallel(batches)
 
             resources_notes = [pred.resource_note for pred in result]
-            [pred.reasoning for pred in result]
+            reasonings = [pred.reasoning for pred in result]
 
             score = sum(resources_notes) / len(resources_notes) if len(resources_notes) != 0 else 0
-            # for (note, reason) in zip(resources_notes, reasonings, strict=False):
-                # print(f"Note: {note}, reason: {reason}")
+            print(example.query)
+            for (note, reason) in zip(resources_notes, reasonings, strict=False):
+                print(f"Note: {note}, reason: {reason}")
             return score if trace is None else score >= self.threshold
 
     return (RetrievalF1,)