In [2]:
import os


def system(cmd):
    print(cmd)
    os.system(cmd)

In [3]:
from pathlib import Path

data_path = Path("data/ubuntu-18.04-x86")

In [10]:
for file in data_path.iterdir():
    if file.suffix == ".xz":
        system(f"tar -xf {file} -C {data_path}")

tar -xf data/ubuntu-18.04-x86/5.0.0-65-generic.btf.tar.xz -C data/ubuntu-18.04-x86
tar -xf data/ubuntu-18.04-x86/4.18.0-25-generic.btf.tar.xz -C data/ubuntu-18.04-x86
tar -xf data/ubuntu-18.04-x86/5.3.0-76-generic.btf.tar.xz -C data/ubuntu-18.04-x86
tar -xf data/ubuntu-18.04-x86/4.15.0-213-generic.btf.tar.xz -C data/ubuntu-18.04-x86
tar -xf data/ubuntu-18.04-x86/5.4.0-91-generic.btf.tar.xz -C data/ubuntu-18.04-x86


In [2]:
!sudo apt install linux-tools-generic bpftool

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
Note, selecting 'linux-tools-common' instead of 'bpftool'
linux-tools-common is already the newest version (6.2.0-39.40).
linux-tools-generic is already the newest version (6.2.0.39.39).
0 upgraded, 0 newly installed, 0 to remove and 0 not upgraded.


In [6]:
def get_linux_tools_path():
    parent = Path("/usr/lib/linux-tools")
    versions = [x for x in parent.iterdir() if x.is_dir()]
    if len(versions) == 0:
        raise Exception("No linux-tools found")
    versions.sort()
    return parent / versions[-1]


def get_bpftool_path():
    path = get_linux_tools_path() / "bpftool"
    if not path.exists():
        raise Exception("bpftool not found")
    return path


bpftool_path = get_bpftool_path()

In [9]:
for file in data_path.iterdir():
    if file.suffix == ".btf":
        system(
            f"{bpftool_path} btf dump file {file} format c > {file.with_suffix('.h')}")
        system(
            f"{bpftool_path} btf dump file {file} format raw > {file.with_suffix('.txt')}")
        system(
            f"{bpftool_path} btf dump file {file} --json > {file.with_suffix('.json')}")

/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.4.0-91-generic.btf format c > data/ubuntu-18.04-x86/5.4.0-91-generic.h
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.4.0-91-generic.btf format raw > data/ubuntu-18.04-x86/5.4.0-91-generic.txt
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.4.0-91-generic.btf --json > data/ubuntu-18.04-x86/5.4.0-91-generic.json
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.0.0-65-generic.btf format c > data/ubuntu-18.04-x86/5.0.0-65-generic.h
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.0.0-65-generic.btf format raw > data/ubuntu-18.04-x86/5.0.0-65-generic.txt
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file data/ubuntu-18.04-x86/5.0.0-65-generic.btf --json > data/ubuntu-18.04-x86/5.0.0-65-generic.json
/usr/lib/linux-tools/6.2.0-39-generic/bpftool btf dump file 

In [4]:
json_files = sorted(file for file in data_path.iterdir()
                    if file.suffix == ".json")

In [44]:
import json


def load_json(json_path):
    with open(json_path) as f:
        return json.load(f)

In [45]:
class BTF:
    def __init__(self, path):
        self.path = path
        self.data = load_json(path)['types']

    def __getitem__(self, id):
        e = self.data[id - 1]
        assert e['id'] == id
        return e
    
    def __len__(self):
        return len(self.data)
    
    def __repr__(self):
        return self.path.stem

    def print(self):
        from collections import defaultdict

        print("Sample:")
        kinds = defaultdict(int)
        for e in self.data:
            if e['kind'] not in kinds:
                print(f"\t{e}")
            kinds[e['kind']] += 1

        kinds = sorted(kinds.items(), key=lambda x: x[1], reverse=True)
        print(f"Kinds: {dict(kinds)})")

    def filter_on_kind(self, kind):
        return {
            e['name']: e for e in self.data
            if e['kind'] == kind and e['name'] != '(anon)'
        }


d1 = BTF(json_files[0])
d2 = BTF(json_files[1])

In [31]:
d1.print()

Sample:
	{'id': 1, 'kind': 'INT', 'name': 'long unsigned int', 'size': 8, 'bits_offset': 0, 'nr_bits': 64, 'encoding': '(none)'}
	{'id': 2, 'kind': 'CONST', 'name': '(anon)', 'type_id': 1}
	{'id': 3, 'kind': 'VOLATILE', 'name': '(anon)', 'type_id': 1}
	{'id': 4, 'kind': 'ARRAY', 'name': '(anon)', 'type_id': 1, 'index_type_id': 20, 'nr_elems': 2}
	{'id': 5, 'kind': 'PTR', 'name': '(anon)', 'type_id': 8}
	{'id': 11, 'kind': 'TYPEDEF', 'name': '__s8', 'type_id': 12}
	{'id': 38, 'kind': 'FUNC_PROTO', 'name': '(anon)', 'ret_type_id': 0, 'vlen': 1, 'params': [{'name': '(anon)', 'type_id': 20}]}
	{'id': 78, 'kind': 'STRUCT', 'name': '(anon)', 'size': 4, 'vlen': 1, 'members': [{'name': 'counter', 'type_id': 20, 'bits_offset': 0}]}
	{'id': 106, 'kind': 'UNION', 'name': '(anon)', 'size': 8, 'vlen': 3, 'members': [{'name': 'type', 'type_id': 1, 'bits_offset': 0}, {'name': 'entries', 'type_id': 108, 'bits_offset': 0}, {'name': 'next', 'type_id': 109, 'bits_offset': 0}]}
	{'id': 120, 'kind': 'ENUM'

In [32]:
d2.print()

Sample:
	{'id': 1, 'kind': 'INT', 'name': 'long unsigned int', 'size': 8, 'bits_offset': 0, 'nr_bits': 64, 'encoding': '(none)'}
	{'id': 2, 'kind': 'CONST', 'name': '(anon)', 'type_id': 1}
	{'id': 3, 'kind': 'VOLATILE', 'name': '(anon)', 'type_id': 1}
	{'id': 4, 'kind': 'ARRAY', 'name': '(anon)', 'type_id': 1, 'index_type_id': 21, 'nr_elems': 2}
	{'id': 5, 'kind': 'PTR', 'name': '(anon)', 'type_id': 8}
	{'id': 11, 'kind': 'TYPEDEF', 'name': '__s8', 'type_id': 12}
	{'id': 37, 'kind': 'ENUM', 'name': '(anon)', 'encoding': 'UNSIGNED', 'size': 4, 'vlen': 2, 'values': [{'name': 'false', 'val': 0}, {'name': 'true', 'val': 1}]}
	{'id': 40, 'kind': 'FUNC_PROTO', 'name': '(anon)', 'ret_type_id': 0, 'vlen': 1, 'params': [{'name': '(anon)', 'type_id': 21}]}
	{'id': 80, 'kind': 'STRUCT', 'name': '(anon)', 'size': 4, 'vlen': 1, 'members': [{'name': 'counter', 'type_id': 21, 'bits_offset': 0}]}
	{'id': 110, 'kind': 'UNION', 'name': '(anon)', 'size': 8, 'vlen': 3, 'members': [{'name': 'type', 'type_i

In [109]:
import copy

def normalize_struct(s):
    s = copy.deepcopy(s)
    assert s['vlen'] == len(s['members'])
    del s['vlen']
    del s['id']
    for m in s['members']:
        del m['type_id']
    return s

def diff_struct(old, new):
    old_members = {m['name'] : m for m in old['members']}
    new_members = {m['name'] : m for m in new['members']}

    # fields added or removed
    if set(old_members) != set(new_members):
        added_members = new_members.keys() - old_members.keys()
        removed_members = old_members.keys() - new_members.keys()

        if added_members and removed_members:
            return f"Fields added and removed: {added_members} {removed_members}"
        elif added_members and not removed_members:
            return f"Added fields: {added_members}"
        elif not added_members and removed_members:
            return f"Removed fields: {removed_members}"
        else:
            assert False
    
    # fields reordered
    if list(old_members) != list(new_members):
        if old['size'] != new['size']:
            return "Struct size changed caused by field reordering"
        else:
            return "Fields reordered"
    
    if old['size'] != new['size']:
        return "Struct size changed caused by field size change"
    
    for old_member, new_member in zip(old['members'], new['members']):
        if old_member['bits_offset'] != new_member['bits_offset']:
            return "Field changed size without affecting size of struct"
        
    assert False, f"\n{old}\n{new}"


def print_collection(name, s):
    print(f"{name} ({len(s)}): {list(s)}")

def check_diff(d_old, d_new, kind):
    f_old = d_old.filter_on_kind(kind)
    f_new = d_new.filter_on_kind(kind)

    print_collection(f"Old {kind}", f_old.keys())
    print_collection(f"New {kind}", f_new.keys())

    print_collection(f"Removed {kind}", f_old.keys() - f_new.keys())
    print_collection(f"Added {kind}", f_new.keys() - f_old.keys())

    common = {
        name : (f_old[name], f_new[name])
        for name in f_old.keys() & f_new.keys()
    }
    print_collection(f"Common {kind}", common.keys())

    changed = {
        name : (old, new)
        for name, (old, new) in common.items()
        if normalize_struct(old) != normalize_struct(new)
    }
    print_collection(f"Changed {kind}", changed.keys())

    for name, (old, new) in changed.items():
        reason = diff_struct(old, new)
        print(f"{kind} {name} differs: {reason}")
        






check_diff(d1, d2, 'STRUCT')

Old STRUCT (7329): ['list_head', 'hlist_head', 'hlist_node', 'callback_head', 'jump_entry', 'static_key', 'static_key_true', 'static_key_false', 'file_operations', 'atomic_notifier_head', 'taint_flag', 'timespec', 'restart_block', 'task_struct', 'screen_info', 'apm_bios_info', 'apm_info', 'edd_device_params', 'edd_info', 'edd', 'ist_info', 'edid_info', 'setup_header', 'sys_desc_table', 'olpc_ofw_header', 'efi_info', 'boot_e820_entry', 'boot_params', 'range', 'pt_regs', 'desc_struct', 'idt_bits', 'gate_struct', 'desc_ptr', 'pgprot', 'page', 'paravirt_callee_save', 'pv_info', 'pv_init_ops', 'pv_lazy_ops', 'pv_time_ops', 'pv_cpu_ops', 'thread_struct', 'pv_irq_ops', 'pv_mmu_ops', 'mm_struct', 'cpumask', 'flush_tlb_info', 'qspinlock', 'qrwlock', 'pv_lock_ops', 'paravirt_patch_site', 'math_emu_info', 'tracepoint_func', 'tracepoint', 'fregs_state', 'fxregs_state', 'swregs_state', 'xstate_header', 'xregs_state', 'fpu', 'cpuinfo_x86', 'x86_hw_tss', 'seq_operations', 'entry_stack', 'entry_stack_

In [None]:
# Sorry, I wasn't awake today at 5:45 PM (Central Time), but that time slot works for me in the future.