# GET OSTEP

Fetch the textbook [**Operating Systems: Three Easy Pieces**](https://pages.cs.wisc.edu/~remzi/OSTEP/) chapter by chapter and generate a single pdf of it with detailed bookmarks.

Run every cell of this notebook to get a copy.

In [1]:
import requests
from bs4 import BeautifulSoup

url = "https://pages.cs.wisc.edu/~remzi/OSTEP/"
res = requests.get(url)

In [2]:
bs = BeautifulSoup(res.text)

chapters = {}

for unit in bs.find_all('td', bgcolor=True):
    idx_tag = unit.find('small')
    if not idx_tag: continue
    fname = unit.find('a').get('href')
    if not fname: continue
    chapters[('0'+idx_tag.text)[-2:]] = fname

chapter_list = []
for i, nm in chapters.items():
    chapter_list.append((i, nm))
chapter_list.sort()
print(chapter_list)

[('01', 'dialogue-threeeasy.pdf'), ('02', 'intro.pdf'), ('03', 'dialogue-virtualization.pdf'), ('04', 'cpu-intro.pdf'), ('05', 'cpu-api.pdf'), ('06', 'cpu-mechanisms.pdf'), ('07', 'cpu-sched.pdf'), ('08', 'cpu-sched-mlfq.pdf'), ('09', 'cpu-sched-lottery.pdf'), ('10', 'cpu-sched-multi.pdf'), ('11', 'cpu-dialogue.pdf'), ('12', 'dialogue-vm.pdf'), ('13', 'vm-intro.pdf'), ('14', 'vm-api.pdf'), ('15', 'vm-mechanism.pdf'), ('16', 'vm-segmentation.pdf'), ('17', 'vm-freespace.pdf'), ('18', 'vm-paging.pdf'), ('19', 'vm-tlbs.pdf'), ('20', 'vm-smalltables.pdf'), ('21', 'vm-beyondphys.pdf'), ('22', 'vm-beyondphys-policy.pdf'), ('23', 'vm-complete.pdf'), ('24', 'vm-dialogue.pdf'), ('25', 'dialogue-concurrency.pdf'), ('26', 'threads-intro.pdf'), ('27', 'threads-api.pdf'), ('28', 'threads-locks.pdf'), ('29', 'threads-locks-usage.pdf'), ('30', 'threads-cv.pdf'), ('31', 'threads-sema.pdf'), ('32', 'threads-bugs.pdf'), ('33', 'threads-events.pdf'), ('34', 'threads-dialogue.pdf'), ('35', 'dialogue-persis

In [3]:
def getfile(src, dst):
    while True:
        try:
            rf = requests.get(src)
            with open(dst, "wb") as f:
                f.write(rf.content)
            break
        except:
            continue

In [4]:
# get every chapter
import os

subdir = "ostep_chapters"
if not os.path.exists(os.path.join(os.curdir, subdir)):
    os.mkdir(os.path.join(os.curdir, subdir))

for i, nm in chapters.items():
    tgt = os.path.join(os.curdir, subdir, f"{i}{nm}")
    if os.path.exists(tgt): continue
    getfile(url + nm, tgt)

In [5]:
# merge chapters to topics
from pypdf import PdfWriter

topics = [("Virtualization", 3, 25), ("Concurrency", 25, 35), ("Persistence", 35, 52), ("Security", 52, 58)]
tpcdir = "ostep_topics"
if not os.path.exists(os.path.join(os.curdir, tpcdir)):
    os.mkdir(os.path.join(os.curdir, tpcdir))

for tname, begin, end in topics:
    merger = PdfWriter()
    for i, nm in chapter_list[begin-1:end-1]:
        merger.append(os.path.join(subdir, i + nm), outline_item=f"{i} {nm[:-4]}")
    merger.write(os.path.join(tpcdir, f"{tname}.pdf"))

In [6]:
# get preface and toc
preface, toc = "preface.pdf", "toc.pdf"
ptgt = os.path.join(os.curdir, subdir, f"00{preface}")
if not os.path.exists(ptgt):
    getfile(url + preface, ptgt)
ttgt = os.path.join(os.curdir, subdir, f"00{toc}")
if not os.path.exists(ttgt):
    getfile(url + toc, ttgt)

In [7]:
merger = PdfWriter()
merger.append(os.path.join(subdir, f"00{preface}"), outline_item=preface[:-4].capitalize())
merger.append(os.path.join(subdir, f"00{toc}"), outline_item=toc[:-4].upper())
# the first 2 chapters
for j in range(2):
    i, nm = chapter_list[j]
    merger.append(os.path.join(subdir, i + nm), outline_item=f"{i} {nm[:-4]}")
for tname, _, _ in topics:
    merger.append(os.path.join(tpcdir, f"{tname}.pdf"), outline_item=tname, import_outline=True)
merger.write("ostep_.pdf")

(True, <_io.FileIO [closed]>)

In [8]:
# appendices
got = set(chapters.values())
got.add(preface)
got.add(toc)

appdir = "ostep_appendices"
if not os.path.exists(os.path.join(os.curdir, appdir)):
    os.mkdir(os.path.join(os.curdir, appdir))

appendices = []
for link_tag in bs.find_all('a', href=True):
    link = link_tag.get('href')
    if ".pdf" in link and link not in got:
        appendices.append(link)
        tgt = os.path.join(appdir, link)
        if os.path.exists(tgt): continue
        getfile(url + link, tgt)
print(appendices)

['dialogue-vmm.pdf', 'vmm-intro.pdf', 'dialogue-monitors.pdf', 'threads-monitors.pdf', 'dialogue-labs.pdf', 'lab-tutorial.pdf', 'lab-projects-systems.pdf', 'lab-projects-xv6.pdf']


In [9]:
# merge appendices
merger = PdfWriter()
for a in appendices:
    merger.append(os.path.join(appdir, a), outline_item=a[:-4])
merger.write("Appendex.pdf")

(True, <_io.FileIO [closed]>)

In [10]:
merger = PdfWriter()
merger.append("ostep_.pdf", import_outline=True)
merger.append("Appendex.pdf", outline_item="Appendex")
merger.write("OSTEP.pdf")

(True, <_io.FileIO [closed]>)

In [11]:
# clean
import shutil

def safe_rm(fname):
    if os.path.exists(fname):
        try:
            os.remove(fname)
        except:
            shutil.rmtree(fname)

safe_rm(subdir) # comment this line if you would also like to have the single-chapter pdfs
safe_rm(tpcdir)
safe_rm(appdir)
safe_rm("ostep_.pdf")
safe_rm("Appendex.pdf")