In [4]:
import polars as pl

from pathlib import Path
import tempfile
import subprocess
import tqdm

In [5]:
OUTPUT_PATH = Path('../data-raw/uv/')
TIME_DELTA = pl.duration(days=1)  # Simulate at release date + TIME_DELTA

In [6]:
def simulate_installation(package, release, date, filename):
    with tempfile.TemporaryDirectory() as directory:
        # Initialize a uv project
        try:
            subprocess.run(['uv', 'init', '.', '--bare', '--no-workspace', '--name', 'root'], cwd=directory, check=True, capture_output=True, text=True)
        except subprocess.CalledProcessError as e:
            with open(OUTPUT_PATH / f'{filename}.err', 'w') as fp:
                fp.write(e.stdout)
                fp.write('\n\n\n')
                fp.write(e.stderr)
            raise
            
        # Add dependency
        try:
            r = subprocess.run(
                ['uv', 'add', f'{package}=={release}', '--exclude-newer', date, '--no-sync', '-qq', '--color', 'never'], 
                cwd=directory, check=True, capture_output=True, text=True
            )
        except subprocess.CalledProcessError as e:
            with open(OUTPUT_PATH / f'{filename}.err', 'w') as fp:
                fp.write(e.stdout)
                fp.write('\n\n\n')
                fp.write(e.stderr)
            raise

        # Export tree
        r = subprocess.run(
            ['uv', 'tree', '--no-dev', '--no-dedupe', '--no-progress', '--show-sizes', '--quiet', '--color', 'never'], 
            cwd=directory, capture_output=True, text=True
        )
        with open(OUTPUT_PATH / f'{filename}.tree', 'w') as fp:
            fp.write(r.stdout)

        """
        # Export lockfile
        r = subprocess.run(
            ['uv', 'export', '--frozen', '--format', 'pylock.toml'], 
            cwd=directory, capture_output=True, text=True
        )
        with open(OUTPUT_PATH / f'{filename}.lock', 'w') as fp:
            fp.write(r.stdout)
        """
            
        

In [7]:
packages = (
    pl.read_parquet('../data/selected_releases.parquet')
    .sort('package', 'date')
    .select(
        'package', 
        'release', 
        (pl.col('date') + TIME_DELTA).dt.to_string('%FT%T'),
    )
    .with_columns(pl.col('date').shift(-1).over('package').alias('date_next'))
    .rows()
)

In [None]:
for package, release, date, next_date in tqdm.tqdm(packages, miniters=1): 
    filename = f'{package}#{release}#current'
    if not Path(OUTPUT_PATH / (filename + '.tree')).exists():
        try:
            simulate_installation(package, release, date, filename)
        except Exception as e:
            print(filename, type(e), e)
            
    filename = f'{package}#{release}#next'
    if not Path(OUTPUT_PATH / (filename + '.tree')).exists() and next_date is not None:
        try:
            simulate_installation(package, release, next_date, filename)
        except Exception as e:
            print(filename, type(e), e)


  0%|                                     | 71/72471 [00:30<26:41:40,  1.33s/it]

CairoSVG#2.6.0#current <class 'subprocess.CalledProcessError'> Command '['uv', 'add', 'CairoSVG==2.6.0', '--exclude-newer', '2023-01-13T16:55:31', '--no-sync', '-qq', '--color', 'never']' returned non-zero exit status 1.


  1%|▎                                    | 530/72471 [01:27<2:32:50,  7.84it/s]

ImageIO#2.24.0#current <class 'subprocess.CalledProcessError'> Command '['uv', 'add', 'ImageIO==2.24.0', '--exclude-newer', '2023-01-10T02:51:12', '--no-sync', '-qq', '--color', 'never']' returned non-zero exit status 1.


  1%|▎                                    | 610/72471 [01:36<2:46:38,  7.19it/s]

Let's see how many packages/releases were correctly simulated. 

In [37]:
results = []
for filename in OUTPUT_PATH.iterdir():
    if filename.name.startswith('.'):
        continue
        
    package, release, r = filename.name.split('#')
    at, result = r.split('.')
    results.append((package, release, at, result))

results = pl.from_records(results, schema=['package', 'simulation', 'at', 'result'], orient='row')

In [39]:
# Here, "simulations" is around twice the number of releases (exactly that, minus one for each package)
print(results.n_unique('package'), 'packages and', len(results), 'simulations')
print('success for', results.filter(pl.col('result') == pl.lit('tree')).n_unique('package'), 'packages and', len(results.filter(pl.col('result') == pl.lit('tree'))), 'simulations')
print('failure for', results.filter(pl.col('result') != pl.lit('tree')).n_unique('package'), 'packages and', len(results.filter(pl.col('result') != pl.lit('tree'))), 'simulations')

_ = results.group_by('package').agg((pl.col('result') == pl.lit('tree')).all(), pl.len().alias('simulations')).filter('result')
print('full success for', _.n_unique('package'), 'packages and', _.select('simulations').sum().item(), 'simulations')

1710 packages and 143232 simulations
success for 1673 packages and 128636 simulations
failure for 287 packages and 14596 simulations
full success for 1423 packages and 103725 simulations
