# Evaluation

In [13]:
import csv
import subprocess
import os
import timeit

In [14]:
with open('../2-pattern-extraction/pattern_occurrences.csv', 'r') as f:
    reader = csv.DictReader(f)
    occurrences = {}
    for row in reader:
        if row['pattern'] not in occurrences:
            occurrences[row['pattern']] = []
        occurrences[row['pattern']].append(row['url'])

## Checkov

In [15]:
CHECKOV_CHECKS = {
    'Object storage lifecycle rules': ','.join({
        'CKV2_AWS_61'
    }),
    'AWS - Expensive DynamoDB': ','.join({
        'CKV_AWS_801', # pay per request
        'CKV_AWS_802', # read/write capacity
        'CKV_AWS_803', # global secondary indices
    }),
    # 'Old generation': ','.join({
    # }),
}

stats = {}

for pattern, checks in CHECKOV_CHECKS.items():
    print('Analyzing', pattern)

    stats[pattern] = []

    for url in occurrences[pattern]:
        print('-', url)

        _, _, _, owner, name, _, sha = url.split('/')

        snapshot_path = f'snapshots/{owner}-{name}-{sha}'

        summary = {
            'url': url,
            'before': [],
        }

        # Versions are "after" (state after the commit) and one or more "before_<sha>" (one or more parent commits)
        for version in os.listdir(snapshot_path):
            print(f'\t* Running checkov against `{version}`')

            version_path = f'{snapshot_path}/{version}/'

            start = timeit.default_timer()

            result = subprocess.run([
                'checkov',
                '--evaluate-variables', 'True',
                '--directory', version_path,
                '--check', CHECKOV_CHECKS[pattern],
            ], capture_output=True)

            end = timeit.default_timer()

            if version == 'after':
                summary['after'] = {
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                }
            else:
                summary['before'].append({
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                })

            print('\t  Done after', end - start, 's')

        stats[pattern].append(summary)

Analyzing Object storage lifecycle rules
- https://github.com/alphagov/govuk-aws/commit/f844cd8e254b161bebef04101f8ce177bcd0840c
	* Running checkov against `after`
	  Done after 52.11604164703749 s
	* Running checkov against `before-f9fc11443ab8926b0761fb890974cd7169f9235f`
	  Done after 51.32816302217543 s
- https://github.com/alphagov/govuk-terraform-provisioning/commit/ac105ab0a5ae38fbf69167e072f8970a4a61c3e8
	* Running checkov against `after`
	  Done after 6.976479676086456 s
	* Running checkov against `before-c167e29c9d5867b897bd035315341d14fff84f29`
	  Done after 7.939214223995805 s
- https://github.com/ExpediaGroup/apiary-data-lake/commit/47e62f2fc73a96611606cd619c084d1ded9d844d
	* Running checkov against `after`
	  Done after 6.222702512983233 s
	* Running checkov against `before-f3c3068c66185f51ba97ed71a14ffdbea2a698b1`
	  Done after 6.2598359861876816 s
- https://github.com/SamTowne/BasketballDrillBot/commit/4ec6d54e4d36ab02b0a7daf042e727717371eaec
	* Running checkov against 

In [19]:
import polars as pl

for pattern in CHECKOV_CHECKS.keys():
    df = pl.DataFrame(stats[pattern])

    print(pattern)
    print(df)
    print()

    only_returncodes = df.with_columns(before=pl.col('before').list.get(0)) \
        .with_columns(
            returncode_before=pl.col('before').struct.field('return_code'),
            returncode_after=pl.col('after').struct.field('return_code'),
        ) \
        .drop(['before', 'after'])
    
    print(only_returncodes)
    print()

Object storage lifecycle rules
shape: (6, 3)
┌────────────────────────────────┬────────────────────────────────┬────────────────────────────────┐
│ url                            ┆ before                         ┆ after                          │
│ ---                            ┆ ---                            ┆ ---                            │
│ str                            ┆ list[struct[4]]                ┆ struct[4]                      │
╞════════════════════════════════╪════════════════════════════════╪════════════════════════════════╡
│ https://github.com/alphagov/go ┆ [{1,b"\x0a\x20\x20\x20\x20\x20 ┆ {1,b"\x0a\x20\x20\x20\x20\x20\ │
│ …                              ┆ …                              ┆ …                              │
│ https://github.com/alphagov/go ┆ [{1,b"\x0a\x20\x20\x20\x20\x20 ┆ {1,b"\x0a\x20\x20\x20\x20\x20\ │
│ …                              ┆ …                              ┆ …                              │
│ https://github.com/ExpediaGrou ┆ [{1,b"\x0a\