# Debian package statistics

Some code to parse Debian's `Package.xz` fiels

In [1]:
import re, lzma, urllib

In [69]:
class Packages:
    def __init__(self, filename):
        self.pkgs = {}
        with lzma.open(filename, 'rt') as f:
            fields = {}
            for l in f:
                if l == '\n':
                    self.pkgs[fields['Package']] = fields.copy()
                    fields = {}
                elif l.startswith(' '):
                    fields[k] += '\n' + l.strip()
                else:
                    k, v = l.split(':', 1)
                    fields[k] = v.strip()
        self.parse_deps()

    def parse_deps(self):
        regex = re.compile(r'([^\s,|]+)(?: \([^\),]+\))?')
        for p, d in self.pkgs.items():
            for f in ('Depends', 'Recommends', 'Suggests'):
                if f in d:
                    deps = regex.findall(d[f])
                    d['Parsed--' + f] = deps
                    for dep in deps:
                        if dep in self.pkgs:
                            pkg = self.pkgs[dep]
                            rev = pkg.get('Reverse--' + f, set())
                            rev.add(p)
                            pkg['Reverse--' + f] = rev

    def depends(self, pkg, field='Parsed--Depends', deps=None, unsat=None):
        if deps is None:
            deps = set()
        if unsat is None:
            unsat = set()
        if pkg in deps:
            return len(deps) - 1, len(unsat)
        if pkg not in self.pkgs:
            unsat.add(pkg)
            return len(deps), len(unsat) - 1
        deps.add(pkg)
        pkg = self.pkgs[pkg]
        if field not in pkg:
            return len(deps) - 1, len(unsat)

        for d in pkg[field]:
            _, u = self.depends(d, field, deps, unsat)

        return len(deps) - 1, len(unsat)
    
    def mdeps(self, *pkgs, fields=['Parsed--Depends', 'Reverse--Depends']):
        return { p: { f : self.depends(p, f) for f in fields } for p in pkgs }
    
    def search(self, string):
        regex = re.compile(string)
        results = set()
        for p in self.pkgs:
            if regex.search(p):
                results.add(p)
        return results
    
    def __getitem__(self, key):
        return self.pkgs[key]
    
    def __repr__(self):
        return 'Debian Packages file, containing %d packages' % len(self.pkgs)

## September 1, 2015

In [97]:
urllib.request.urlretrieve ("https://snapshot.debian.org/archive/debian/20150901T034109Z/dists/jessie/main/binary-i386/Packages.xz",
                            "Packages-2015.xz")

('Packages-2015.xz', <http.client.HTTPMessage at 0x7fb3a0fa5590>)

In [91]:
pkgs = Packages('Packages-2015.xz')
pkgs

Debian Packages file, containing 42304 packages

In [63]:
pkgs['pari-gp']

{'Package': 'pari-gp',
 'Source': 'pari',
 'Version': '2.7.2-1',
 'Installed-Size': '6040',
 'Maintainer': 'Bill Allombert <ballombe@debian.org>',
 'Architecture': 'i386',
 'Depends': 'libc6 (>= 2.3.6-6~), libgmp10, libreadline6 (>= 6.0), libx11-6',
 'Recommends': 'pari-doc, pari-galdata, pari-elldata, pari-seadata',
 'Suggests': 'pari-gp2c, pari-galpol',
 'Description': 'PARI/GP Computer Algebra System binaries',
 'Homepage': 'http://pari.math.u-bordeaux.fr',
 'Description-md5': 'f3f3e19e97fc49d96307dd88d802d215',
 'Tag': 'field::mathematics, role::documentation, uitoolkit::ncurses',
 'Section': 'math',
 'Priority': 'optional',
 'Filename': 'pool/main/p/pari/pari-gp_2.7.2-1_i386.deb',
 'Size': '2082486',
 'MD5sum': '8334c60c7321142951e36800c2038ec3',
 'SHA1': 'a132fb1e7acf147f0983137f407dd29fee8b0140',
 'SHA256': 'ddcf36c20771d19a9ef510b38a93dd9711175c948d5b8b96ea2f3d213562adbc',
 'Reverse--Suggests': {'education-mathematics',
  'hol-light',
  'pari-elldata',
  'pari-galdata',
  'pari

In [48]:
pkgs.mdeps('pari-gp2c', 'pari-gp', 'gap')

{'pari-gp2c': {'Parsed--Depends': (36, 1), 'Reverse--Depends': (0, 0)},
 'pari-gp': {'Parsed--Depends': (14, 0), 'Reverse--Depends': (6, 0)},
 'gap': {'Parsed--Depends': (13, 0), 'Reverse--Depends': (9, 0)}}

## August 31, 2017

In [81]:
urllib.request.urlretrieve ("https://snapshot.debian.org/archive/debian/20170831T220419Z/dists/stretch/main/binary-i386/Packages.xz",
                            "Packages-2017.xz")

('Packages-2017.xz', <http.client.HTTPMessage at 0x7f2def8d9c50>)

In [82]:
pkgs = Packages('Packages-2017.xz')
pkgs

Debian Packages file, containing 50668 packages

In [83]:
pkgs.mdeps('pari-gp2c', 'pari-gp', 'gap', 'sagemath', 'sagemath-jupyter', 'singular')

{'pari-gp2c': {'Parsed--Depends': (39, 1), 'Reverse--Depends': (0, 0)},
 'pari-gp': {'Parsed--Depends': (16, 0), 'Reverse--Depends': (9, 0)},
 'gap': {'Parsed--Depends': (14, 0), 'Reverse--Depends': (20, 0)},
 'sagemath': {'Parsed--Depends': (602, 27), 'Reverse--Depends': (3, 0)},
 'sagemath-jupyter': {'Parsed--Depends': (653, 27),
  'Reverse--Depends': (0, 0)},
 'singular': {'Parsed--Depends': (21, 0), 'Reverse--Depends': (4, 0)}}

In [90]:
pkgs['gap']

{'Package': 'gap',
 'Version': '4r8p6-2',
 'Installed-Size': '22',
 'Maintainer': 'Bill Allombert <ballombe@debian.org>',
 'Architecture': 'all',
 'Depends': 'gap-core, gap-libs, gap-online-help',
 'Recommends': 'gap-doc, gap-dev, gap-trans-groups, gap-prim-groups, gap-small-groups, gap-autpgrp, gap-alnuth, gap-character-tables, gap-polycyclic, gap-table-of-marks',
 'Suggests': 'gap-small-groups-extra, gap-atlasrep, gap-float, gap-grape, gap-guava, gap-io, gap-openmath, gap-radiroot, gap-scscp',
 'Description': 'computer algebra system for Groups, Algorithms and Programming',
 'Homepage': 'http://www.gap-system.org/',
 'Description-md5': '7710bf392357fe2388d0b4d8cdf04d7f',
 'Tag': 'field::mathematics, role::metapackage',
 'Section': 'math',
 'Priority': 'optional',
 'Filename': 'pool/main/g/gap/gap_4r8p6-2_all.deb',
 'Size': '11412',
 'MD5sum': '8081f13f62b7b06301f55ea55dad802e',
 'SHA256': 'cf24441234bd37c3269ce17a179913cf2d0eb17295fec747f06047510b790d98',
 'Reverse--Recommends': {'re

## Debian buster, August 29, 2019

In [16]:
urllib.request.urlretrieve ("https://snapshot.debian.org/archive/debian/20190829T233739Z/dists/buster/main/binary-i386/Packages.xz",
                            "Packages-2019.xz")

('Packages-2019.xz', <http.client.HTTPMessage at 0x7f2e3f7d9990>)

In [93]:
pkgs = Packages('Packages-2019.xz')
pkgs

Debian Packages file, containing 56484 packages

In [71]:
pkgs['sagemath']

{'Package': 'sagemath',
 'Version': '8.6-6',
 'Installed-Size': '102647',
 'Maintainer': 'Debian Science Team <debian-science-maintainers@lists.alioth.debian.org>',
 'Architecture': 'i386',
 'Depends': 'ecl (>= 16.1.3), gap-core (>= 4r10p0-3), libblas3 | libblas.so.3, libbraiding0, libbrial-groebner3, libbrial3, libc6 (>= 2.4), libcliquer1, libec4, libecm1, libflint-2.5.2, libflint-arb2, libgc1c2 (>= 1:7.2d), libgcc1 (>= 1:7), libgd3 (>= 2.1.0~alpha~), libgivaro9 (>= 4.0.2-8~), libglpk40 (>= 4.59), libgmp10 (>= 2:6.1.0), libgmpxx4ldbl, libgomp1 (>= 4.9), libgsl23 (>= 2.5), libgslcblas0 (>= 2.4), libhomfly0, libiml0, liblapack3 | liblapack.so.3, liblfunction0, liblinbox-1.5.2-0, liblinboxsage-1.5.2-0, liblrcalc1, libm4ri-0.0.20140914, libm4rie-0.0.20150908, libmpc3, libmpfi0 (>= 1.5.1), libmpfr6 (>= 4.0.0), libntl35, libpari-gmp-tls6 (>= 2.11.0-1), libplanarity0, libpng16-16 (>= 1.6.2-1), libppl14, libpynac18, libratpoints-2.1.3, libreadline7 (>= 6.0), librw0, libsingular4m1, libstdc++6

In [72]:
pkgs.mdeps('pari-gp2c', 'pari-gp', 'gap', 'sagemath', 'sagemath-jupyter', 'singular')

{'pari-gp2c': {'Parsed--Depends': (41, 1), 'Reverse--Depends': (0, 0)},
 'pari-gp': {'Parsed--Depends': (16, 0), 'Reverse--Depends': (9, 0)},
 'gap': {'Parsed--Depends': (15, 0), 'Reverse--Depends': (13, 0)},
 'sagemath': {'Parsed--Depends': (517, 20), 'Reverse--Depends': (3, 0)},
 'sagemath-jupyter': {'Parsed--Depends': (593, 20),
  'Reverse--Depends': (0, 0)},
 'singular': {'Parsed--Depends': (23, 0), 'Reverse--Depends': (4, 0)}}

In [109]:
sorted([(p, len(d['Parsed--Depends'])) for p, d in pkgs.pkgs.items() if 'Parsed--Depends' in d], key=lambda x:x[1])[-20:]

[('php-horde-imp', 78),
 ('npm', 79),
 ('python3-nova', 80),
 ('kdepim-addons', 83),
 ('kmail', 83),
 ('php-horde-core', 83),
 ('firefox-esr-l10n-all', 97),
 ('vlc-plugin-base', 100),
 ('request-tracker4', 103),
 ('forensics-all', 105),
 ('plasma-desktop', 106),
 ('tryton-modules-all', 106),
 ('parl-desktop-eu', 125),
 ('libdpdk-dev', 126),
 ('plasma-workspace', 126),
 ('libmono-cil-dev', 132),
 ('forensics-extra', 153),
 ('sagemath', 159),
 ('tesseract-ocr-all', 162),
 ('parl-desktop-world', 383)]