# Getting BTF Files for Ubuntu 20.04

In [1]:
!sudo apt install zstd linux-tools-generic -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
zstd is already the newest version (1.5.4+dfsg2-4).
linux-tools-generic is already the newest version (6.2.0.39.39).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [30]:
%reload_ext autoreload
%autoreload 2

from utils import system

In [3]:
from pathlib import Path

data_path = Path("data/20.04-x86")
url_prefix = "http://security.ubuntu.com/ubuntu"
package_url = f"{url_prefix}/dists/focal-security/main/binary-amd64/Packages.gz"

# data_path = Path("data/20.04-arm64")
# url_prefix = "http://ports.ubuntu.com/ubuntu-ports"
# package_url = f"{url_prefix}/dists/focal-security/main/binary-arm64/Packages.gz"

tmp_path = data_path / "tmp"
tmp_path.mkdir(parents=True, exist_ok=True)

In [4]:
import urllib.request
import gzip


def download_package_index():
    gz_path = tmp_path / "Packages.gz"

    if not gz_path.exists():
        print(f"Downloading {package_url} to {gz_path}")
        urllib.request.urlretrieve(package_url, gz_path)
    else:
        print(f"Using {gz_path}")

    package_path = gz_path.with_suffix("")
    if not package_path.with_suffix("").exists():
        print(f"Unzipping {gz_path} to {package_path}")
        with gzip.open(gz_path, "rb") as f_in:
            with open(package_path, "wb") as f_out:
                f_out.write(f_in.read())
    else:
        print(f"Using {package_path}")

    return package_path


package_path = download_package_index()
package_path

Using data/20.04-x86/tmp/Packages.gz
Using data/20.04-x86/tmp/Packages


PosixPath('data/20.04-x86/tmp/Packages')

In [9]:
def parse_package_index(package_path):
    result = {}
    with open(package_path) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            key, val = line.split(": ", 1)
            if key == "Package":
                package = val
            elif key == "Filename":
                result[package] = val
    return result


package_index = parse_package_index(package_path)

list(package_index.items())[:10]

[('accountsservice',
  'pool/main/a/accountsservice/accountsservice_0.6.55-0ubuntu12~20.04.6_amd64.deb'),
 ('adcli', 'pool/main/a/adcli/adcli_0.9.0-1ubuntu0.20.04.1_amd64.deb'),
 ('adsys', 'pool/main/a/adsys/adsys_0.9.2~20.04.1_amd64.deb'),
 ('adsys-windows', 'pool/main/a/adsys/adsys-windows_0.9.2~20.04.1_amd64.deb'),
 ('advancecomp',
  'pool/main/a/advancecomp/advancecomp_2.1-2.1ubuntu0.20.04.1_amd64.deb'),
 ('aide', 'pool/main/a/aide/aide_0.16.1-1ubuntu0.1_amd64.deb'),
 ('aide-common', 'pool/main/a/aide/aide-common_0.16.1-1ubuntu0.1_all.deb'),
 ('amd64-microcode',
  'pool/main/a/amd64-microcode/amd64-microcode_3.20191218.1ubuntu1.2_amd64.deb'),
 ('apache2', 'pool/main/a/apache2/apache2_2.4.41-4ubuntu3.15_amd64.deb'),
 ('apache2-bin',
  'pool/main/a/apache2/apache2-bin_2.4.41-4ubuntu3.15_amd64.deb')]

In [42]:
import re
from collections import defaultdict


def filter_linux_images(package_index):
    result = defaultdict(dict)
    for package, path in package_index.items():
        groups = re.match(r"^linux-image-(\d+\.\d+\.\d+)-(\d+)-generic$", package)
        if groups is None:
            continue

        version, build = groups.groups()
        result[version][int(build)] = package, path

    return [v[max(v.keys())] for v in result.values()]


linux_versions = filter_linux_images(package_index)

linux_versions

[('linux-image-5.11.0-46-generic',
  'pool/main/l/linux-signed-hwe-5.11/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_amd64.deb'),
 ('linux-image-5.13.0-52-generic',
  'pool/main/l/linux-signed-hwe-5.13/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_amd64.deb'),
 ('linux-image-5.15.0-92-generic',
  'pool/main/l/linux-signed-hwe-5.15/linux-image-5.15.0-92-generic_5.15.0-92.102~20.04.1_amd64.deb'),
 ('linux-image-5.4.0-170-generic',
  'pool/main/l/linux-signed/linux-image-5.4.0-170-generic_5.4.0-170.188_amd64.deb'),
 ('linux-image-5.8.0-63-generic',
  'pool/main/l/linux-signed-hwe-5.8/linux-image-5.8.0-63-generic_5.8.0-63.71~20.04.1_amd64.deb')]

In [43]:
def download_deb_files(linux_versions):
    results = {}
    for package, path in linux_versions:
        url = f"{url_prefix}/{path}"
        file_path = tmp_path / path.split("/")[-1]
        if not file_path.exists():
            print(f"Downloading {url} to {file_path}")
            urllib.request.urlretrieve(url, file_path)
        else:
            print(f"Using {file_path}")

        key = package.removeprefix("linux-image-")
        results[key] = file_path

    return results


deb_paths = download_deb_files(linux_versions)

deb_paths

Using data/20.04-x86/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_amd64.deb
Using data/20.04-x86/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_amd64.deb
Using data/20.04-x86/tmp/linux-image-5.15.0-92-generic_5.15.0-92.102~20.04.1_amd64.deb
Using data/20.04-x86/tmp/linux-image-5.4.0-170-generic_5.4.0-170.188_amd64.deb
Using data/20.04-x86/tmp/linux-image-5.8.0-63-generic_5.8.0-63.71~20.04.1_amd64.deb


{'5.11.0-46-generic': PosixPath('data/20.04-x86/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_amd64.deb'),
 '5.13.0-52-generic': PosixPath('data/20.04-x86/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_amd64.deb'),
 '5.15.0-92-generic': PosixPath('data/20.04-x86/tmp/linux-image-5.15.0-92-generic_5.15.0-92.102~20.04.1_amd64.deb'),
 '5.4.0-170-generic': PosixPath('data/20.04-x86/tmp/linux-image-5.4.0-170-generic_5.4.0-170.188_amd64.deb'),
 '5.8.0-63-generic': PosixPath('data/20.04-x86/tmp/linux-image-5.8.0-63-generic_5.8.0-63.71~20.04.1_amd64.deb')}

In [49]:
def extract_vmlinuz_files(deb_paths):
    results = {}
    for name, deb_path in deb_paths.items():
        vmlinuz_path = tmp_path / f"vmlinuz-{name}"
        if not vmlinuz_path.exists():
            print(f"Extracting {deb_path} to {vmlinuz_path}")
            # system(f"dpkg -x {deb_path} {tmp_path}")
            system(f"dpkg --fsys-tarfile {deb_path} | tar -xO ./boot/vmlinuz-{name} > {vmlinuz_path}")
        else:
            print(f"Using {vmlinuz_path}")
        results[name] = vmlinuz_path
    return results


vmlinuz_paths = extract_vmlinuz_files(deb_paths)

vmlinuz_paths

Using data/20.04-x86/tmp/vmlinuz-5.11.0-46-generic
Using data/20.04-x86/tmp/vmlinuz-5.13.0-52-generic
Using data/20.04-x86/tmp/vmlinuz-5.15.0-92-generic
Using data/20.04-x86/tmp/vmlinuz-5.4.0-170-generic
Using data/20.04-x86/tmp/vmlinuz-5.8.0-63-generic


{'5.11.0-46-generic': PosixPath('data/20.04-x86/tmp/vmlinuz-5.11.0-46-generic'),
 '5.13.0-52-generic': PosixPath('data/20.04-x86/tmp/vmlinuz-5.13.0-52-generic'),
 '5.15.0-92-generic': PosixPath('data/20.04-x86/tmp/vmlinuz-5.15.0-92-generic'),
 '5.4.0-170-generic': PosixPath('data/20.04-x86/tmp/vmlinuz-5.4.0-170-generic'),
 '5.8.0-63-generic': PosixPath('data/20.04-x86/tmp/vmlinuz-5.8.0-63-generic')}

In [57]:
extract_vmlinux = Path("data") / "extract-vmlinux"

assert extract_vmlinux.exists(), f"{extract_vmlinux} does not exist"

def extract_vmlinux_files(vmlinuz_paths):
    results = {}
    for name, vmlinuz_path in vmlinuz_paths.items():
        vmlinux_path = tmp_path / f"vmlinux-{name}"
        if not vmlinux_path.exists() or vmlinux_path.stat().st_size == 0:
            print(f"Extracting {vmlinuz_path} to {vmlinux_path}")
            # TODO: check how the file is compressed
            if "arm64" in str(vmlinuz_path):
                system(f"zcat {vmlinuz_path} > {vmlinux_path}")
            else:
                system(f"{extract_vmlinux} {vmlinuz_path} > {vmlinux_path}")
        else:
            print(f"Using {vmlinux_path}")
        results[name] = vmlinux_path
    return results


vmlinux_paths = extract_vmlinux_files(vmlinuz_paths)

vmlinux_paths

Using data/20.04-x86/tmp/vmlinux-5.11.0-46-generic
Using data/20.04-x86/tmp/vmlinux-5.13.0-52-generic
Using data/20.04-x86/tmp/vmlinux-5.15.0-92-generic
Using data/20.04-x86/tmp/vmlinux-5.4.0-170-generic
Using data/20.04-x86/tmp/vmlinux-5.8.0-63-generic


{'5.11.0-46-generic': PosixPath('data/20.04-x86/tmp/vmlinux-5.11.0-46-generic'),
 '5.13.0-52-generic': PosixPath('data/20.04-x86/tmp/vmlinux-5.13.0-52-generic'),
 '5.15.0-92-generic': PosixPath('data/20.04-x86/tmp/vmlinux-5.15.0-92-generic'),
 '5.4.0-170-generic': PosixPath('data/20.04-x86/tmp/vmlinux-5.4.0-170-generic'),
 '5.8.0-63-generic': PosixPath('data/20.04-x86/tmp/vmlinux-5.8.0-63-generic')}

In [35]:
def parse_version_triple(name):
    return tuple(map(int, name.split("-")[0].split(".")))

parse_version_triple("5.4.0-170-generic")

(5, 4, 0)

In [58]:
def extract_btf_files(vmlinux_paths):
    results = {}
    for name, vmlinux_path in vmlinux_paths.items():
        # if parse_version_triple(name) <= (5, 8, 0):
            # print(f"Skipping {name} because it doesn't have BTF support")
            # continue
        btf_path = data_path / f"{name}.btf"
        if not btf_path.exists():
            print(f"Extracting {vmlinux_path} to {btf_path}")
            system(
                f"objcopy -I elf64-little {vmlinux_path} --dump-section .BTF={btf_path}"
            )
            # we use objcopy instead of pahole because pahole sometimes fails with
            # "btf_encoder__new: cannot get ELF header", and pahole seems does more
            # processing than we need
            # system(f"pahole --btf_encode_detached {btf_path} {vmlinux_path}")
        else:
            print(f"Using {btf_path}")
        results[vmlinux_path.name] = btf_path

    return results


btf_paths = extract_btf_files(vmlinux_paths)

btf_paths

Using data/20.04-x86/5.11.0-46-generic.btf
Using data/20.04-x86/5.13.0-52-generic.btf
Using data/20.04-x86/5.15.0-92-generic.btf
Using data/20.04-x86/5.4.0-170-generic.btf
Using data/20.04-x86/5.8.0-63-generic.btf


{'vmlinux-5.11.0-46-generic': PosixPath('data/20.04-x86/5.11.0-46-generic.btf'),
 'vmlinux-5.13.0-52-generic': PosixPath('data/20.04-x86/5.13.0-52-generic.btf'),
 'vmlinux-5.15.0-92-generic': PosixPath('data/20.04-x86/5.15.0-92-generic.btf'),
 'vmlinux-5.4.0-170-generic': PosixPath('data/20.04-x86/5.4.0-170-generic.btf'),
 'vmlinux-5.8.0-63-generic': PosixPath('data/20.04-x86/5.8.0-63-generic.btf')}