# Evaluation

In [1]:
import csv
import subprocess
import os
import timeit
import polars as pl

In [2]:
with open('../2-pattern-extraction/pattern_occurrences.csv', 'r') as f:
    reader = csv.DictReader(f)
    occurrences = {}
    for row in reader:
        if row['pattern'] not in occurrences:
            occurrences[row['pattern']] = []
        occurrences[row['pattern']].append(row['url'])

## Checkov

In [11]:
CHECKOV_CHECKS = {
    'Object storage lifecycle rules': ','.join({
        'CKV2_AWS_61'
    }),
    'AWS - Expensive DynamoDB': ','.join({
        'CKV_AWS_801', # pay per request
        'CKV_AWS_802', # read/write capacity
        'CKV_AWS_803', # global secondary indices
    }),
    'Old generation': ','.join({
        'CKV_AWS_804'
    }),
}

checkov_stats = []

for pattern, checks in CHECKOV_CHECKS.items():
    print('Analyzing', pattern)

    for url in occurrences[pattern]:
        print('-', url)

        _, _, _, owner, name, _, sha = url.split('/')

        snapshot_path = f'snapshots/{owner}-{name}-{sha}'

        summary = {
            'pattern': pattern,
            'url': url,
            'before': [],
        }

        # Versions are "after" (state after the commit) and one or more "before_<sha>" (one or more parent commits)
        for version in os.listdir(snapshot_path):
            version_path = f'{snapshot_path}/{version}/'

            print(f'    * Running checkov against `{version}`')

            start = timeit.default_timer()

            result = subprocess.run([
                'checkov',
                '--evaluate-variables', 'True',
                '--directory', version_path,
                '--check', checks,
            ], capture_output=True, encoding='utf-8')

            end = timeit.default_timer()

            if version == 'after':
                summary['after'] = {
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                }
            else:
                summary['before'].append({
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                })

            print(f'      Done after {end - start:.2f}s')

        checkov_stats.append(summary)

Analyzing Object storage lifecycle rules
- https://github.com/alphagov/govuk-aws/commit/f844cd8e254b161bebef04101f8ce177bcd0840c
    * Running checkov against `after`
      Done after 65.13s
    * Running checkov against `before-f9fc11443ab8926b0761fb890974cd7169f9235f`
      Done after 65.55s
- https://github.com/alphagov/govuk-terraform-provisioning/commit/ac105ab0a5ae38fbf69167e072f8970a4a61c3e8
    * Running checkov against `after`
      Done after 7.56s
    * Running checkov against `before-c167e29c9d5867b897bd035315341d14fff84f29`
      Done after 7.96s
- https://github.com/ExpediaGroup/apiary-data-lake/commit/47e62f2fc73a96611606cd619c084d1ded9d844d
    * Running checkov against `after`
      Done after 7.55s
    * Running checkov against `before-f3c3068c66185f51ba97ed71a14ffdbea2a698b1`
      Done after 8.17s
- https://github.com/SamTowne/BasketballDrillBot/commit/4ec6d54e4d36ab02b0a7daf042e727717371eaec
    * Running checkov against `after`
      Done after 8.51s
    * Running

In [43]:
checkov_df = pl.DataFrame(checkov_stats)

with pl.Config(tbl_rows=150, tbl_width_chars=500):
    print(checkov_df)

# for pat, url, before, after in checkov_df.iter_rows():
#     if len(before) == 0:
#         continue

#     if not (before[0]['return_code'] == 1 and after['return_code'] == 0):
#         print('==========', pat, url, '==========\n\n',
#               'BEFORE STDOUT:', before[0]['stdout'],
#               '\n\nSTDERR:', before[0]['stderr'],
#               '\n\nAFTER STDOUT:', after['stdout'],
#               '\n\nSTDERR:', after['stderr'])

tp = checkov_df.filter(
    (pl.col('before').list.get(0).struct.field('return_code') == 1) &
    (pl.col('after').struct.field('return_code') == 0)
).group_by('pattern').len().join(checkov_df.group_by('pattern').len(), on='pattern').rename({'len': 'success', 'len_right': 'total'})

print(tp)

checkov_df.write_json('results_checkov.json', row_oriented=True)

shape: (65, 4)
┌────────────────────────────────┬─────────────────────────────────┬───────────────────────────┬────────────────────────────┐
│ pattern                        ┆ url                             ┆ before                    ┆ after                      │
│ ---                            ┆ ---                             ┆ ---                       ┆ ---                        │
│ str                            ┆ str                             ┆ list[struct[4]]           ┆ struct[4]                  │
╞════════════════════════════════╪═════════════════════════════════╪═══════════════════════════╪════════════════════════════╡
│ Object storage lifecycle rules ┆ https://github.com/alphagov/go… ┆ [{1,"                     ┆ {1,"                       │
│                                ┆                                 ┆        _               _… ┆        _               _ … │
│ Object storage lifecycle rules ┆ https://github.com/alphagov/go… ┆ [{1,"                     ┆ {1,"  

## Tflint

In [22]:
TFLINT_RULES = {
    'Budget': '--only=cost_aws_budget',
    'Object storage lifecycle rules': '--only=cost_aws_object_storage_lifecycle_rule',
    'Old generation': '--only=cost_aws_old_generation',
    'AWS - Expensive DynamoDB': '--only=cost_aws_expensive_dynamodb'
}

tflint_stats = []

for pattern, rules in TFLINT_RULES.items():
    print('Analyzing', pattern)

    for url in occurrences[pattern]:
        print('- ', url)

        _, _, _, owner, name, _, sha = url.split('/')

        snapshot_path = f'snapshots/{owner}-{name}-{sha}'

        summary = {
            'pattern': pattern,
            'url': url,
            'before': [],
        }

        for version in os.listdir(snapshot_path):
            version_path = f'{snapshot_path}/{version}/'

            print(f'    * Running tflint against `{version}`')

            start = timeit.default_timer()

            result = subprocess.run([
                'tflint',
                '--format=json',
                '--enable-plugin=cost',
                rules,
                f'--chdir={version_path}'
            ], capture_output=True, encoding='utf-8')

            end = timeit.default_timer()

            if version == 'after':
                summary['after'] = {
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                }
            else:
                summary['before'].append({
                    'return_code': result.returncode,
                    'stdout': result.stdout,
                    'stderr': result.stderr,
                    'duration_seconds': end - start,
                })

            print(f'      Done after {end - start:.2f}s')

        tflint_stats.append(summary)

Analyzing Budget
-  https://github.com/AJarombek/global-aws-infrastructure/commit/4a89f4b8235961275fa0e6aaf20848f2b8b7e733
    * Running tflint against `after`
      Done after 0.05s
    * Running tflint against `before-d79ab7eb96ba8d1bd6600ad116d024c124967e3a`
      Done after 0.05s
-  https://github.com/MartinFeineis/terraform/commit/359ba426393c78b78695797f9bdd6a08c0455720
    * Running tflint against `after`
      Done after 0.05s
    * Running tflint against `before-a78936b97d7c08b1f82e01361ba27def21aa8f00`
      Done after 0.05s
-  https://github.com/stuartellis/stuartellis-org-tf-modules/commit/39a9cabac6765c75591ba258fef0d10ba7ae0f9e
    * Running tflint against `after`
      Done after 0.04s
    * Running tflint against `before-f738fdc91dc8c4db4abd417d4cfc0086633dfa2a`
      Done after 0.04s
-  https://github.com/forgotpw/forgotpw-infrastructure/commit/f4363ad27d366385f2388d073ce8af796e035406
    * Running tflint against `after`
      Done after 0.05s
    * Running tflint agai

In [35]:
tflint_df = pl.DataFrame(tflint_stats)

with pl.Config(tbl_rows=150, tbl_width_chars=500):
    print(tflint_df)

print(tflint_df.filter(
    (pl.col('before').list.get(0).struct.field('return_code') == 2) & \
    (pl.col('after').struct.field('return_code') == 0)
).group_by('pattern').len().join(tflint_df.group_by('pattern').len(), on='pattern').rename({'len': 'success', 'len_right': 'total'}))

tflint_df.write_json('results_tflint.json', row_oriented=True)

shape: (92, 4)
┌────────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┬─────────────────────────────────┐
│ pattern                        ┆ url                             ┆ before                          ┆ after                           │
│ ---                            ┆ ---                             ┆ ---                             ┆ ---                             │
│ str                            ┆ str                             ┆ list[struct[4]]                 ┆ struct[4]                       │
╞════════════════════════════════╪═════════════════════════════════╪═════════════════════════════════╪═════════════════════════════════╡
│ Budget                         ┆ https://github.com/AJarombek/g… ┆ [{0,"{"issues":[],"errors":[]}… ┆ {0,"{"issues":[],"errors":[]}"… │
│ Budget                         ┆ https://github.com/MartinFeine… ┆ [{2,"{"issues":[{"rule":{"name… ┆ {2,"{"issues":[{"rule":{"name"… │
│ Budget                  

## Precision/recall

In [55]:
checkov_clean = checkov_df.filter(pl.col('before').list.len() != 0)

tp = len(checkov_clean.filter(
    pl.col('before').list.get(0).struct.field('return_code') == 1,
))
fn = len(checkov_clean.filter(
    pl.col('before').list.get(0).struct.field('return_code') != 1,
))
tn = len(checkov_clean.filter(
    pl.col('after').struct.field('return_code') == 0,
))
fp = len(checkov_clean.filter(
    pl.col('after').struct.field('return_code') != 0,
))

prec = tp / (tp + fp)
rec  = tp / (tp + fn)

print(f'precision: {prec:.2f}')
print(f'recall: {rec:.2f}')

precision: 0.67
recall: 0.78


In [51]:
tflint_clean = tflint_df \
    .filter([
        pl.col('before').list.len() != 0,
        (pl.col('before').list.get(0).struct.field('return_code') != 1) | (pl.col('after').struct.field('return_code') != 1)
    ])

tp = len(tflint_clean.filter(
    pl.col('before').list.get(0).struct.field('return_code') == 2,
))
fn = len(tflint_clean.filter(
    pl.col('before').list.get(0).struct.field('return_code') != 2,
))
tn = len(tflint_clean.filter(
    pl.col('after').struct.field('return_code') == 0,
))
fp = len(tflint_clean.filter(
    pl.col('after').struct.field('return_code') != 0,
))

prec = tp / (tp + fp)
rec  = tp / (tp + fn)

print(f'precision: {prec:.2f}')
print(f'recall: {rec:.2f}')

precision: 0.67
recall: 0.17
