In [1]:
import asyncio
import itertools
import datetime
import random
import collections

import pandas

import pipdepgraph.constants
import pipdepgraph.entrypoints
import pipdepgraph.entrypoints.common
import pipdepgraph.pypi_api
from pipdepgraph.repositories import (
    direct_dependency_repository,
    known_package_name_repository,
)

CLUSTER_HOST = "tail-rpi-cluster-control-node"

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:

# Packages that don't show up in this list don't have any difference between their version
# requirements between different distributions of the same version. If a package does show
# up in this list, that makes the process of resolving dependencies much more difficult.

async with pipdepgraph.entrypoints.common.initialize_client_session() as session:
    pypi_client = pipdepgraph.pypi_api.PypiApi(session)
    popular_packages = await pypi_client.get_popular_packages()

packages_in_violation = []
async with pipdepgraph.entrypoints.common.initialize_async_connection_pool(host=CLUSTER_HOST) as db_pool:
    dd_repo = direct_dependency_repository.DirectDependencyRepository(db_pool)
    packages_processed = 0

    for popular_package in popular_packages.packages:
        packages_processed += 1
        direct_deps: list[direct_dependency_repository.DirectDependencyResult] = []
        version_distribution_groups = collections.defaultdict(lambda: collections.defaultdict(set))

        async for dd_of_package in dd_repo.iter_direct_dependencies(kv_package_name=popular_package.package_name, vd_package_type="bdist_wheel", vd_processed=True, output_as_dict=False):
            direct_deps.append(dd_of_package)
            version_distribution_groups[dd_of_package.known_version.package_version][dd_of_package.version_distribution.version_distribution_id].add((
                dd_of_package.direct_dependency.extras,
                dd_of_package.direct_dependency.dependency_name,
                None if dd_of_package.direct_dependency.dependency_extras is None else (
                    ','.join(list(sorted(dd_of_package.direct_dependency.dependency_extras.split(','))))
                ),
                dd_of_package.direct_dependency.version_constraint,
            ))

        any_differences = False
        versions_different = set()

        for violating_version, dist_dd_sets in version_distribution_groups.items():
            for dd_set_1, dd_set_2 in itertools.pairwise(dist_dd_sets.values()):
                if dd_set_1 != dd_set_2:
                    versions_different.add(violating_version)

        if versions_different:
            packages_in_violation.append((popular_package.package_name, versions_different, direct_deps))

packages_processed, len(packages_in_violation)

discarding closed connection: <psycopg.AsyncConnection [BAD] at 0x10aec8260>


OperationalError: consuming input failed: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

In [27]:
def analyze_violating_package(
        violating_package_name: str,
        violating_versions: set[str],
        direct_deps: list[direct_dependency_repository.DirectDependencyResult]
    ):

    for violating_version in violating_versions:
        version_distributions = {
            dd.version_distribution.package_filename
            for dd in direct_deps
            if dd.known_version.package_version == violating_version
        }

        dep_group = collections.defaultdict(list)
        for dep in direct_deps:
            dep: direct_dependency_repository.DirectDependencyResult
            if dep.known_version.package_version == violating_version:
                dep_group[(
                    dep.direct_dependency.extras,
                    dep.direct_dependency.dependency_name,
                    None if dd_of_package.direct_dependency.dependency_extras is None else (
                        ','.join(list(sorted(dd_of_package.direct_dependency.dependency_extras.split(','))))
                    ),
                    dep.direct_dependency.version_constraint,
                )].append(dep.version_distribution.package_filename)
        ...

analyze_violating_package(*random.choice(packages_in_violation))

180