# PDF to Markdown Conversion

This notebook converts downloaded PDFs to markdown format using PyMuPDF. It also performs language detection to filter out incorrectly classified articles, keeping only those in the target language.

## Setup

### Imports

In [1]:
import yaml
import pymupdf
import pymupdf4llm
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from lingua import LanguageDetectorBuilder

Consider using the pymupdf_layout package for a greatly improved page layout analysis.


### Configuration and Paths

In [2]:
# Load configuration
with open("../config.yaml") as f:
    config = yaml.safe_load(f)

# Set up project paths
project_root = Path.cwd().parent
PDFS_DIR = project_root / config["PDFS_DIR"]
EXTRACTED_DIR = project_root / config["EXTRACTED_DIR"]

# Progress bar format
PROGRESS_BAR_FORMAT = "{desc:<25}{percentage:3.0f}%|{bar:20}{r_bar}"

## Helper Functions

In [3]:
def detect_language(text: str, detector):
    """Detect the language of the given text"""
    result = detector.detect_language_of(text)
    return result.iso_code_639_1.name.lower() if result else None

## Process PDFs

Convert PDFs to markdown, filter by language, and display processing statistics.

In [4]:
processing_stats = []
detector = LanguageDetectorBuilder.from_all_languages().build()

for lang_code, lang_config in config["LANGUAGES"].items():
    lang_pdf_dir = PDFS_DIR / lang_code
    lang_extracted_dir = EXTRACTED_DIR / lang_code
    lang_extracted_dir.mkdir(parents=True, exist_ok=True)

    if not lang_pdf_dir.exists():
        continue

    pdf_files = list(lang_pdf_dir.glob("*.pdf"))
    kept_count = 0
    wrong_lang_count = 0
    error_count = 0

    for pdf_path in tqdm(
        pdf_files, 
        total=len(pdf_files), 
        desc=f"Processing {lang_config['name']}", 
        bar_format=PROGRESS_BAR_FORMAT
    ):
        try:
            # Convert PDF to markdown
            with pymupdf.open(str(pdf_path)) as doc:
                md_text = pymupdf4llm.to_markdown(doc)

            # Verify language
            detected_code = detect_language(md_text, detector)
            if detected_code == lang_code:
                # Rename PDF and save markdown
                new_pdf_path = lang_pdf_dir / f"{kept_count}.pdf"
                pdf_path.rename(new_pdf_path)

                md_path = lang_extracted_dir / f"{kept_count}.md"
                md_path.write_text(md_text, encoding="utf-8")
                kept_count += 1
            else:
                # Remove PDF if wrong language
                pdf_path.unlink()
                wrong_lang_count += 1

        except Exception as e:
            print(f"Error converting PDF to markdown: {str(e)}")
            pdf_path.unlink(missing_ok=True)
            error_count += 1

    processing_stats.append(
        {
            "Language": lang_config["name"],
            "Total PDFs": len(pdf_files),
            "Kept": kept_count,
            "Wrong Language": wrong_lang_count,
            "Errors": error_count,
            "Success Rate": (
                f"{kept_count / len(pdf_files) * 100:.1f}%" if pdf_files else "0%"
            ),
        }
    )


Processing Tamil           1%|                    | 3/482 [00:06<16:17,  2.04s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\1.-மூன்று-படிகை்-எழுதுதல்-ஆய்வுக்கட்டுமர-எழுதுை்வழிமுமற-குறித்த-வமரவு.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\1.pdf'


Processing Tamil           1%|▏                   | 6/482 [00:14<19:36,  2.47s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-1.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           1%|▎                   | 7/482 [00:14<14:21,  1.81s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-2-1.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           2%|▎                   | 8/482 [00:15<11:23,  1.44s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-3.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           2%|▎                   | 9/482 [00:15<08:36,  1.09s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-4.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           2%|▍                   | 10/482 [00:16<07:07,  1.10it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-5.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           2%|▍                   | 11/482 [00:16<06:01,  1.30it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.46632-ctll-4-1-6.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           2%|▍                   | 12/482 [00:17<05:52,  1.33it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\10.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           3%|▌                   | 14/482 [00:19<06:44,  1.16it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\11-7-12-831.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           3%|▌                   | 15/482 [00:20<06:08,  1.27it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\11-8-63-508.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           3%|▋                   | 16/482 [00:21<06:37,  1.17it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\11-9-40-979.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           4%|▋                   | 17/482 [00:21<06:16,  1.23it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\11-9-94-744.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           4%|▋                   | 18/482 [00:22<05:17,  1.46it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\11.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           4%|▊                   | 19/482 [00:22<04:34,  1.69it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\1135.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           5%|▉                   | 24/482 [00:59<34:33,  4.53s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\12.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           5%|█                   | 25/482 [01:00<25:49,  3.39s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\13.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil           9%|█▋                  | 42/482 [01:07<03:32,  2.07it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\14.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          15%|██▉                 | 71/482 [01:24<03:34,  1.92it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          15%|███                 | 73/482 [01:26<05:28,  1.24it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15511.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          15%|███                 | 74/482 [01:27<05:17,  1.29it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15513.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          16%|███                 | 75/482 [01:28<04:42,  1.44it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15514.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          16%|███▏                | 76/482 [01:28<04:20,  1.56it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15515.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          16%|███▏                | 77/482 [01:28<03:34,  1.89it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15516.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          16%|███▏                | 78/482 [01:29<03:56,  1.71it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15517.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          16%|███▎                | 79/482 [01:29<03:24,  1.97it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15518.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          17%|███▎                | 80/482 [01:30<04:08,  1.62it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15521.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          17%|███▎                | 81/482 [01:31<04:21,  1.53it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15522.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          17%|███▍                | 82/482 [01:31<03:40,  1.82it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15524.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          17%|███▍                | 83/482 [01:32<03:19,  2.00it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15525.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          17%|███▍                | 84/482 [01:32<03:29,  1.90it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15528.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          18%|███▌                | 85/482 [01:33<03:33,  1.86it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15529.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          18%|███▌                | 86/482 [01:34<04:18,  1.53it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15530.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          18%|███▌                | 87/482 [01:34<03:47,  1.73it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\15_ShodhKosh_5475.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          18%|███▋                | 88/482 [01:34<03:10,  2.06it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\16.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          19%|███▊                | 91/482 [01:36<03:33,  1.83it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\17.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          19%|███▊                | 92/482 [01:36<03:11,  2.03it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\18.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          19%|███▊                | 93/482 [01:37<03:51,  1.68it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\19.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\2.pdf'


Processing Tamil          21%|████                | 99/482 [01:48<06:44,  1.06s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\20.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          21%|████▏               | 100/482 [01:48<05:16,  1.21it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\21.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          21%|████▏               | 101/482 [01:49<04:23,  1.45it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\22.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          21%|████▏               | 102/482 [01:49<04:12,  1.51it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\23.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          21%|████▎               | 103/482 [01:50<04:16,  1.48it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\24-சித்தர்+பாடல்களில்+நாட்டுப்புறக்+கூறுகள்.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          22%|████▎               | 104/482 [01:50<04:11,  1.50it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\24.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          22%|████▎               | 105/482 [01:51<04:41,  1.34it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\25.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          22%|████▍               | 106/482 [01:52<03:57,  1.58it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\26.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          22%|████▍               | 107/482 [01:53<04:11,  1.49it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\27.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          22%|████▍               | 108/482 [01:53<04:06,  1.52it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\28.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          23%|████▌               | 109/482 [01:54<05:07,  1.21it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\29.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\3.pdf'


Processing Tamil          23%|████▋               | 112/482 [02:00<08:17,  1.35s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\30.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          23%|████▋               | 113/482 [02:01<07:56,  1.29s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\31.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          24%|████▋               | 114/482 [02:03<07:37,  1.24s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\32.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          24%|████▊               | 116/482 [02:04<06:01,  1.01it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\33.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          25%|████▉               | 119/482 [02:05<03:49,  1.58it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\34.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          25%|█████               | 121/482 [02:06<03:07,  1.92it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\35.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          26%|█████               | 123/482 [02:07<02:57,  2.03it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\36.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          26%|█████▏              | 124/482 [02:08<03:36,  1.65it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\37.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          26%|█████▏              | 126/482 [02:11<07:12,  1.22s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\38.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          26%|█████▎              | 127/482 [02:12<06:41,  1.13s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\39.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\4.pdf'


Processing Tamil          27%|█████▍              | 131/482 [02:14<03:48,  1.54it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\40.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          27%|█████▍              | 132/482 [02:17<07:29,  1.28s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\41.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          28%|█████▌              | 133/482 [02:19<08:37,  1.48s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\42.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          28%|█████▌              | 134/482 [02:20<08:55,  1.54s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\43.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          28%|█████▌              | 135/482 [02:22<09:08,  1.58s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\44.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          28%|█████▋              | 136/482 [02:23<07:52,  1.36s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\45.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          28%|█████▋              | 137/482 [02:23<06:35,  1.15s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\45218.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          29%|█████▋              | 138/482 [02:25<07:23,  1.29s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\46.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          29%|█████▊              | 139/482 [02:26<06:51,  1.20s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\47.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          29%|█████▊              | 140/482 [02:28<07:32,  1.32s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\48.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          29%|█████▊              | 141/482 [02:29<07:15,  1.28s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\49.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\5.pdf'


Processing Tamil          30%|█████▉              | 143/482 [02:30<05:29,  1.03it/s]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\50.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          41%|████████▏           | 198/482 [05:18<09:39,  2.04s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\51.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          42%|████████▎           | 201/482 [05:24<08:40,  1.85s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\51499.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          42%|████████▍           | 202/482 [05:25<07:22,  1.58s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\51712.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          43%|████████▌           | 207/482 [05:38<10:40,  2.33s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\52.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          47%|█████████▍          | 227/482 [06:36<09:17,  2.18s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\53.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          47%|█████████▍          | 228/482 [06:38<07:56,  1.88s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\53775.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          48%|█████████▌          | 229/482 [06:38<05:55,  1.41s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\54.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          48%|█████████▌          | 230/482 [06:39<05:28,  1.30s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\54302.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          48%|█████████▌          | 231/482 [06:40<05:09,  1.23s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\54369.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          48%|█████████▋          | 232/482 [06:41<04:17,  1.03s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\54613.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          48%|█████████▋          | 233/482 [06:42<04:28,  1.08s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\55.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          49%|█████████▋          | 234/482 [06:43<04:14,  1.03s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\56.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          49%|█████████▊          | 235/482 [06:45<05:18,  1.29s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\57.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          49%|█████████▊          | 236/482 [06:45<04:13,  1.03s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\57149.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          51%|██████████          | 244/482 [07:06<07:21,  1.86s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\58.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          51%|██████████▏         | 245/482 [07:07<06:02,  1.53s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\59.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\6.pdf'


Processing Tamil          51%|██████████▏         | 247/482 [07:09<04:49,  1.23s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\60.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\7.pdf'


Processing Tamil          51%|██████████▎         | 248/482 [07:11<05:42,  1.46s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\61.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\7.pdf'


Processing Tamil          52%|██████████▎         | 249/482 [07:11<04:27,  1.15s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\62.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\7.pdf'


Processing Tamil          52%|██████████▎         | 250/482 [07:13<04:30,  1.17s/it]

Error converting PDF to markdown: [WinError 183] Cannot create a file when that file already exists: 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\63.pdf' -> 'c:\\Users\\Admin\\Desktop\\2025-2026_LucasGranucci\\Averroes-AI\\data\\pdfs\\ta\\7.pdf'


Processing Tamil          53%|██████████▌         | 255/482 [07:37<06:47,  1.79s/it]


KeyboardInterrupt: 

In [None]:
display(pd.DataFrame(processing_stats))