# Getting BTF Files for Ubuntu 20.04

In [26]:
!sudo apt install zstd linux-tools-generic -y

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
zstd is already the newest version (1.5.4+dfsg2-4).
linux-tools-generic is already the newest version (6.2.0.39.39).
0 upgraded, 0 newly installed, 0 to remove and 2 not upgraded.


In [27]:
%load_ext autoreload
%autoreload now

from utils.system import system
from utils.bpftool import get_bpftool_path

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
from pathlib import Path

data_path = Path("data/20.04-x86")
url_prefix = "http://security.ubuntu.com/ubuntu"
package_url = f"{url_prefix}/dists/focal-security/main/binary-amd64/Packages.gz"

data_path = Path("data/20.04-arm64")
url_prefix = "http://ports.ubuntu.com/ubuntu-ports"
package_url = f"{url_prefix}/dists/focal-security/main/binary-arm64/Packages.gz"

tmp_path = data_path / "tmp"
tmp_path.mkdir(parents=True, exist_ok=True)

In [29]:
import urllib.request
import gzip


def download_package_index():
    gz_path = tmp_path / "Packages.gz"

    if not gz_path.exists():
        print(f"Downloading {package_url} to {gz_path}")
        urllib.request.urlretrieve(package_url, gz_path)
    else:
        print(f"Using {gz_path}")

    package_path = gz_path.with_suffix("")
    if not package_path.with_suffix("").exists():
        print(f"Unzipping {gz_path} to {package_path}")
        with gzip.open(gz_path, "rb") as f_in:
            with open(package_path, "wb") as f_out:
                f_out.write(f_in.read())
    else:
        print(f"Using {package_path}")

    return package_path


package_path = download_package_index()
package_path

Downloading http://ports.ubuntu.com/ubuntu-ports/dists/focal-security/main/binary-arm64/Packages.gz to data/20.04-arm64/tmp/Packages.gz


Unzipping data/20.04-arm64/tmp/Packages.gz to data/20.04-arm64/tmp/Packages


PosixPath('data/20.04-arm64/tmp/Packages')

In [30]:
def parse_package_index(package_path):
    result = {}
    with open(package_path) as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            key, val = line.split(": ", 1)
            if key == "Package":
                package = val
            elif key == "Filename":
                result[package] = val
    return result


package_index = parse_package_index(package_path)

package_index.keys()

dict_keys(['accountsservice', 'adcli', 'adsys', 'adsys-windows', 'advancecomp', 'aide', 'aide-common', 'apache2', 'apache2-bin', 'apache2-data', 'apache2-dev', 'apache2-doc', 'apache2-ssl-dev', 'apache2-utils', 'apport', 'apport-gtk', 'apport-retrace', 'apt', 'apt-doc', 'apt-utils', 'aptdaemon', 'aptdaemon-data', 'aspell', 'aspell-doc', 'avahi-autoipd', 'avahi-daemon', 'avahi-utils', 'awstats', 'backuppc', 'barbican-api', 'barbican-common', 'barbican-doc', 'barbican-keystone-listener', 'barbican-worker', 'bash', 'bash-doc', 'bind9', 'bind9-dnsutils', 'bind9-doc', 'bind9-host', 'bind9-libs', 'bind9-utils', 'binutils', 'binutils-aarch64-linux-gnu', 'binutils-aarch64-linux-gnu-dbg', 'binutils-arm-linux-gnueabihf', 'binutils-arm-linux-gnueabihf-dbg', 'binutils-common', 'binutils-dev', 'binutils-doc', 'binutils-i686-linux-gnu', 'binutils-i686-linux-gnu-dbg', 'binutils-multiarch', 'binutils-multiarch-dbg', 'binutils-multiarch-dev', 'binutils-powerpc64le-linux-gnu', 'binutils-powerpc64le-linu

In [31]:
import re
from collections import defaultdict


def filter_linux_images(package_index):
    result = defaultdict(dict)
    for package, path in package_index.items():
        groups = re.match(r"^linux-image-(\d+\.\d+\.\d+)-(\d+)-generic$", package)
        if groups is None:
            continue

        version, build = groups.groups()
        result[version][int(build)] = package, path

    return [v[max(v.keys())] for v in result.values()]


linux_versions = filter_linux_images(package_index)

linux_versions

[('linux-image-5.11.0-46-generic',
  'pool/main/l/linux-signed-hwe-5.11/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb'),
 ('linux-image-5.13.0-52-generic',
  'pool/main/l/linux-signed-hwe-5.13/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb'),
 ('linux-image-5.15.0-94-generic',
  'pool/main/l/linux-signed-hwe-5.15/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb'),
 ('linux-image-5.4.0-171-generic',
  'pool/main/l/linux-signed/linux-image-5.4.0-171-generic_5.4.0-171.189_arm64.deb'),
 ('linux-image-5.8.0-63-generic',
  'pool/main/l/linux-signed-hwe-5.8/linux-image-5.8.0-63-generic_5.8.0-63.71~20.04.1_arm64.deb')]

In [32]:
def download_deb_files(linux_versions):
    results = {}
    for package, path in linux_versions:
        url = f"{url_prefix}/{path}"
        file_path = tmp_path / path.split("/")[-1]
        if not file_path.exists():
            print(f"Downloading {url} to {file_path}")
            urllib.request.urlretrieve(url, file_path)
        else:
            print(f"Using {file_path}")

        key = package.removeprefix("linux-image-")
        results[key] = file_path

    return results


deb_paths = download_deb_files(linux_versions)

deb_paths

Downloading http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-signed-hwe-5.11/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb to data/20.04-arm64/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb
Downloading http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-signed-hwe-5.13/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb to data/20.04-arm64/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb
Downloading http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-signed-hwe-5.15/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb to data/20.04-arm64/tmp/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb
Downloading http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-signed/linux-image-5.4.0-171-generic_5.4.0-171.189_arm64.deb to data/20.04-arm64/tmp/linux-image-5.4.0-171-generic_5.4.0-171.189_arm64.deb
Downloading http://ports.ubuntu.com/ubuntu-ports/pool/main/l/linux-signed-hwe-5.8/linux-image-5.8.0-63-ge

{'5.11.0-46-generic': PosixPath('data/20.04-arm64/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb'),
 '5.13.0-52-generic': PosixPath('data/20.04-arm64/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb'),
 '5.15.0-94-generic': PosixPath('data/20.04-arm64/tmp/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb'),
 '5.4.0-171-generic': PosixPath('data/20.04-arm64/tmp/linux-image-5.4.0-171-generic_5.4.0-171.189_arm64.deb'),
 '5.8.0-63-generic': PosixPath('data/20.04-arm64/tmp/linux-image-5.8.0-63-generic_5.8.0-63.71~20.04.1_arm64.deb')}

In [33]:
def extract_vmlinuz_files(deb_paths):
    results = {}
    for name, deb_path in deb_paths.items():
        vmlinuz_path = tmp_path / f"vmlinuz-{name}"
        if not vmlinuz_path.exists():
            print(f"Extracting {deb_path} to {vmlinuz_path}")
            # system(f"dpkg -x {deb_path} {tmp_path}")
            system(f"dpkg --fsys-tarfile {deb_path} | tar -xO ./boot/vmlinuz-{name} > {vmlinuz_path}")
        else:
            print(f"Using {vmlinuz_path}")
        results[name] = vmlinuz_path
    return results


vmlinuz_paths = extract_vmlinuz_files(deb_paths)

vmlinuz_paths

Extracting data/20.04-arm64/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb to data/20.04-arm64/tmp/vmlinuz-5.11.0-46-generic
Running command: "[92mdpkg --fsys-tarfile data/20.04-arm64/tmp/linux-image-5.11.0-46-generic_5.11.0-46.51~20.04.1_arm64.deb | tar -xO ./boot/vmlinuz-5.11.0-46-generic > data/20.04-arm64/tmp/vmlinuz-5.11.0-46-generic[0m"
Extracting data/20.04-arm64/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb to data/20.04-arm64/tmp/vmlinuz-5.13.0-52-generic
Running command: "[92mdpkg --fsys-tarfile data/20.04-arm64/tmp/linux-image-5.13.0-52-generic_5.13.0-52.59~20.04.1_arm64.deb | tar -xO ./boot/vmlinuz-5.13.0-52-generic > data/20.04-arm64/tmp/vmlinuz-5.13.0-52-generic[0m"
Extracting data/20.04-arm64/tmp/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb to data/20.04-arm64/tmp/vmlinuz-5.15.0-94-generic
Running command: "[92mdpkg --fsys-tarfile data/20.04-arm64/tmp/linux-image-5.15.0-94-generic_5.15.0-94.104~20.04.1_arm64.deb 

{'5.11.0-46-generic': PosixPath('data/20.04-arm64/tmp/vmlinuz-5.11.0-46-generic'),
 '5.13.0-52-generic': PosixPath('data/20.04-arm64/tmp/vmlinuz-5.13.0-52-generic'),
 '5.15.0-94-generic': PosixPath('data/20.04-arm64/tmp/vmlinuz-5.15.0-94-generic'),
 '5.4.0-171-generic': PosixPath('data/20.04-arm64/tmp/vmlinuz-5.4.0-171-generic'),
 '5.8.0-63-generic': PosixPath('data/20.04-arm64/tmp/vmlinuz-5.8.0-63-generic')}

In [34]:
extract_vmlinux = Path("data") / "extract-vmlinux"

assert extract_vmlinux.exists(), f"{extract_vmlinux} does not exist"

def extract_vmlinux_files(vmlinuz_paths):
    results = {}
    for name, vmlinuz_path in vmlinuz_paths.items():
        vmlinux_path = tmp_path / f"vmlinux-{name}"
        if not vmlinux_path.exists():
            print(f"Extracting {vmlinuz_path} to {vmlinux_path}")
            # TODO: check how the file is compressed
            # system(f"{extract_vmlinux} {vmlinuz_path} > {vmlinux_path}")
            system(f"zcat {vmlinuz_path} > {vmlinux_path}")
        else:
            print(f"Using {vmlinux_path}")
        results[name] = vmlinux_path
    return results


vmlinux_paths = extract_vmlinux_files(vmlinuz_paths)

vmlinux_paths

Extracting data/20.04-arm64/tmp/vmlinuz-5.11.0-46-generic to data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic
Running command: "[92mzcat data/20.04-arm64/tmp/vmlinuz-5.11.0-46-generic > data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic[0m"
Extracting data/20.04-arm64/tmp/vmlinuz-5.13.0-52-generic to data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic
Running command: "[92mzcat data/20.04-arm64/tmp/vmlinuz-5.13.0-52-generic > data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic[0m"
Extracting data/20.04-arm64/tmp/vmlinuz-5.15.0-94-generic to data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic
Running command: "[92mzcat data/20.04-arm64/tmp/vmlinuz-5.15.0-94-generic > data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic[0m"
Extracting data/20.04-arm64/tmp/vmlinuz-5.4.0-171-generic to data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic
Running command: "[92mzcat data/20.04-arm64/tmp/vmlinuz-5.4.0-171-generic > data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic[0m"
Extracting data/20.04-arm64/tmp/vmlinuz-5.8.0-63-gen

{'5.11.0-46-generic': PosixPath('data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic'),
 '5.13.0-52-generic': PosixPath('data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic'),
 '5.15.0-94-generic': PosixPath('data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic'),
 '5.4.0-171-generic': PosixPath('data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic'),
 '5.8.0-63-generic': PosixPath('data/20.04-arm64/tmp/vmlinux-5.8.0-63-generic')}

In [35]:
def parse_version_triple(name):
    return tuple(map(int, name.split("-")[0].split(".")))

parse_version_triple("5.4.0-170-generic")

(5, 4, 0)

In [36]:
def extract_btf_files(vmlinux_paths):
    results = {}
    for name, vmlinux_path in vmlinux_paths.items():
        # if parse_version_triple(name) <= (5, 8, 0):
            # print(f"Skipping {name} because it doesn't have BTF support")
            # continue
        btf_path = data_path / f"{name}.btf"
        if not btf_path.exists():
            print(f"Extracting {vmlinux_path} to {btf_path}")
            system(
                f"objcopy -I elf64-little {vmlinux_path} --dump-section .BTF={btf_path}"
            )
            # we use objcopy instead of pahole because pahole sometimes fails with
            # "btf_encoder__new: cannot get ELF header", and pahole seems does more
            # processing than we need
            # system(f"pahole --btf_encode_detached {btf_path} {vmlinux_path}")
        else:
            print(f"Using {btf_path}")
        results[vmlinux_path.name] = btf_path

    return results


btf_paths = extract_btf_files(vmlinux_paths)

btf_paths

Extracting data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic to data/20.04-arm64/5.11.0-46-generic.btf
Running command: "[92mobjcopy -I elf64-little data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic --dump-section .BTF=data/20.04-arm64/5.11.0-46-generic.btf[0m"


objcopy: data/20.04-arm64/tmp/vmlinux-5.11.0-46-generic: can't dump section '.BTF' - it does not exist: file in wrong format


Extracting data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic to data/20.04-arm64/5.13.0-52-generic.btf
Running command: "[92mobjcopy -I elf64-little data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic --dump-section .BTF=data/20.04-arm64/5.13.0-52-generic.btf[0m"


objcopy: data/20.04-arm64/tmp/vmlinux-5.13.0-52-generic: can't dump section '.BTF' - it does not exist: file in wrong format


Extracting data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic to data/20.04-arm64/5.15.0-94-generic.btf
Running command: "[92mobjcopy -I elf64-little data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic --dump-section .BTF=data/20.04-arm64/5.15.0-94-generic.btf[0m"


objcopy: data/20.04-arm64/tmp/vmlinux-5.15.0-94-generic: can't dump section '.BTF' - it does not exist: file in wrong format


Extracting data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic to data/20.04-arm64/5.4.0-171-generic.btf
Running command: "[92mobjcopy -I elf64-little data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic --dump-section .BTF=data/20.04-arm64/5.4.0-171-generic.btf[0m"


objcopy: data/20.04-arm64/tmp/vmlinux-5.4.0-171-generic: can't dump section '.BTF' - it does not exist: file in wrong format


Extracting data/20.04-arm64/tmp/vmlinux-5.8.0-63-generic to data/20.04-arm64/5.8.0-63-generic.btf
Running command: "[92mobjcopy -I elf64-little data/20.04-arm64/tmp/vmlinux-5.8.0-63-generic --dump-section .BTF=data/20.04-arm64/5.8.0-63-generic.btf[0m"


objcopy: data/20.04-arm64/tmp/vmlinux-5.8.0-63-generic: can't dump section '.BTF' - it does not exist: file in wrong format


{'vmlinux-5.11.0-46-generic': PosixPath('data/20.04-arm64/5.11.0-46-generic.btf'),
 'vmlinux-5.13.0-52-generic': PosixPath('data/20.04-arm64/5.13.0-52-generic.btf'),
 'vmlinux-5.15.0-94-generic': PosixPath('data/20.04-arm64/5.15.0-94-generic.btf'),
 'vmlinux-5.4.0-171-generic': PosixPath('data/20.04-arm64/5.4.0-171-generic.btf'),
 'vmlinux-5.8.0-63-generic': PosixPath('data/20.04-arm64/5.8.0-63-generic.btf')}

In [38]:
from pathlib import Path

bpftool_path = get_bpftool_path()

for file in Path("data").glob("**/*.btf"):
    for ext, cmd in [
            (".h", "format c"),
            (".txt", "format raw"),
            (".json", "--json"),
        ]:
        result = file.with_suffix(ext)
        if not result.exists():
            system(f"{bpftool_path} btf dump file {file} {cmd} > {result}")
        else:
            print(f"{result} already exists")

Running command: "[92m/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/20.04-arm64/5.8.0-63-generic.btf format c > data/20.04-arm64/5.8.0-63-generic.h[0m"
Running command: "[92m/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/20.04-arm64/5.8.0-63-generic.btf format raw > data/20.04-arm64/5.8.0-63-generic.txt[0m"
Running command: "[92m/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/20.04-arm64/5.8.0-63-generic.btf --json > data/20.04-arm64/5.8.0-63-generic.json[0m"
data/18.04-x86/5.4.0-91-generic.h already exists
data/18.04-x86/5.4.0-91-generic.txt already exists
data/18.04-x86/5.4.0-91-generic.json already exists
data/18.04-x86/5.0.0-65-generic.h already exists
data/18.04-x86/5.0.0-65-generic.txt already exists
data/18.04-x86/5.0.0-65-generic.json already exists
data/18.04-x86/5.3.0-76-generic.h already exists
data/18.04-x86/5.3.0-76-generic.txt already exists
data/18.04-x86/5.3.0-76-generic.json already exists
data/18.04-x86/4.15.