In [1]:
import requests
from bs4 import BeautifulSoup

url = "https://doc.rust-lang.org/book/"
page = requests.get(url)

soup = BeautifulSoup(page.content, "html.parser")

In [2]:
sidebar = soup.find(class_="sidebar-scrollbox")
chapters = []
currentchapter = None
for link in sidebar.find_all("a"):
    if link.parent.parent["class"][0] != "section":
        if currentchapter != None:
            chapters.append(currentchapter)
        currentchapter = { "url": link["href"], "text": link.text, "sections": [] }
    else:
        currentchapter["sections"].append({ "url": link["href"], "text": link.text })
print(chapters)

[{'url': 'title-page.html', 'text': 'The Rust Programming Language', 'sections': []}, {'url': 'foreword.html', 'text': 'Foreword', 'sections': []}, {'url': 'ch00-00-introduction.html', 'text': 'Introduction', 'sections': []}, {'url': 'ch01-00-getting-started.html', 'text': '1. Getting Started', 'sections': [{'url': 'ch01-01-installation.html', 'text': '1.1. Installation'}, {'url': 'ch01-02-hello-world.html', 'text': '1.2. Hello, World!'}, {'url': 'ch01-03-hello-cargo.html', 'text': '1.3. Hello, Cargo!'}]}, {'url': 'ch02-00-guessing-game-tutorial.html', 'text': '2. Programming a Guessing Game', 'sections': []}, {'url': 'ch03-00-common-programming-concepts.html', 'text': '3. Common Programming Concepts', 'sections': [{'url': 'ch03-01-variables-and-mutability.html', 'text': '3.1. Variables and Mutability'}, {'url': 'ch03-02-data-types.html', 'text': '3.2. Data Types'}, {'url': 'ch03-03-how-functions-work.html', 'text': '3.3. Functions'}, {'url': 'ch03-04-comments.html', 'text': '3.4. Comm

In [4]:
import os

imgpaths = []

for i in range(len(chapters)):
    c = chapters[i]
    csoup = BeautifulSoup(requests.get(url + c["url"]).content, "html.parser")
    main = csoup.find("main")
    cpath = f"../src/routes/book/{i}/"
    if not os.path.exists(cpath):
        os.makedirs(cpath)
    #page = open(cpath + "+page.svelte", "w", encoding="UTF-8")
    #page.write(main.prettify())
    for img in main.find_all("img"):
        imgpaths.append(img["src"])

    for j in range(len(c["sections"])):
        s = c["sections"][j]
        ssoup = BeautifulSoup(requests.get(url + s["url"]).content, "html.parser")
        main = ssoup.find("main")
        spath = cpath + f"{j}/"
        if not os.path.exists(spath):
            os.makedirs(spath)
        #page = open(spath + "+page.svelte", "w", encoding="UTF-8")
        #page.write(main.prettify())
        for img in main.find_all("img"):
            imgpaths.append(img["src"])

print(imgpaths)


['img/ferris/does_not_compile.svg', 'img/ferris/panics.svg', 'img/ferris/not_desired_behavior.svg', 'img/trpl04-01.svg', 'img/trpl04-02.svg', 'img/trpl04-03.svg', 'img/trpl04-04.svg', 'img/trpl04-05.svg', 'img/trpl04-06.svg', 'img/trpl04-07.svg', 'img/trpl14-01.png', 'img/trpl14-02.png', 'img/trpl14-03.png', 'img/trpl14-04.png', 'img/trpl15-01.svg', 'img/trpl15-02.svg', 'img/trpl15-03.svg', 'img/trpl15-04.svg', 'img/trpl20-01.png']


In [12]:
import shutil

for url in imgpaths:
    res = requests.get("https://doc.rust-lang.org/book/" + url, stream=True)
    if res.status_code == 200:
        fpath = "../static/" + url
        ffolder = os.path.dirname(os.path.realpath(fpath))
        if not os.path.exists(ffolder):
            os.makedirs(ffolder)
        if fpath.endswith(".svg"):
            with open(fpath, "w", encoding="UTF-8") as f:
                f.write(res.text)
        else:
            with open(fpath, "wb") as f:
                shutil.copyfileobj(res.raw, f)
    else:
        print("Critical error!")

In [11]:
result = ""

for i in range(len(chapters)):
    c = chapters[i]
    result += f'<a class="nav-item" href="/book/{i}">{c["text"]}</a>\n'
    for j in range(len(c["sections"])):
        s = c["sections"][j]
        result += f'<a class="nav-item pl-6" href="/book/{i}/{j}">{s["text"]}</a>\n'

print(result)

<a class="nav-item" href="/book/0">The Rust Programming Language</a>
<a class="nav-item" href="/book/1">Foreword</a>
<a class="nav-item" href="/book/2">Introduction</a>
<a class="nav-item" href="/book/3">1. Getting Started</a>
<a class="nav-item pl-6" href="/book/3/0">1.1. Installation</a>
<a class="nav-item pl-6" href="/book/3/1">1.2. Hello, World!</a>
<a class="nav-item pl-6" href="/book/3/2">1.3. Hello, Cargo!</a>
<a class="nav-item" href="/book/4">2. Programming a Guessing Game</a>
<a class="nav-item" href="/book/5">3. Common Programming Concepts</a>
<a class="nav-item pl-6" href="/book/5/0">3.1. Variables and Mutability</a>
<a class="nav-item pl-6" href="/book/5/1">3.2. Data Types</a>
<a class="nav-item pl-6" href="/book/5/2">3.3. Functions</a>
<a class="nav-item pl-6" href="/book/5/3">3.4. Comments</a>
<a class="nav-item pl-6" href="/book/5/4">3.5. Control Flow</a>
<a class="nav-item" href="/book/6">4. Understanding Ownership</a>
<a class="nav-item pl-6" href="/book/6/0">4.1. Wha