In [1]:
import collections
import statistics
import sys
from functools import reduce
import json
from pprint import pprint
import os
import socket

import pandas as pd

from common import *
from vulnerability_database import VulnerabilityDatabase

#### Preparation with
```
# pnpm.list generated through real-dolospy notebook
node pack-mirror.mjs -x -o $DATASETS/npm-preprocessed -w 6 -p pnpm.list
```

#### Resultset generated with
```
# Last duration: 40 min
# Use --no-comp and adjusted output name for compartment-less analysis
PORT=4200 python bundler_study_speed_eval.py --worker $(nproc) -f /tmp/feats -p $DATASETS/npm-preprocessed -o $DATASETS/results-real-bundler-study.bson -s $DATASETS/object-storage.tar $DATASETS/bundles-daily/*
python dolos_speed_eval_recover.py -o $DATASETS/real-bundles-results-bundler-study.json -r $DATASETS/results-real-bundler-study.bson $DATASETS/bundles-daily/*
```

In [3]:
with open(os.path.join(DATASETS, "real-bundles-results-bundler-study.json"), "r") as f:
    data_with_compartments = json.load(f)
    
with open(os.path.join(DATASETS, "real-bundles-results-bundler-study-nocomp.json"), "r") as f:
    data_without_compartments = json.load(f)

vulndb = VulnerabilityDatabase(os.path.join(DATASETS, "vulndb.json"))

In [4]:
del metric
def metric(similarity_dict):
    return statistics.mean(max(0, x) for part in similarity_dict.values() for x in part.values())

# Compartment results

In [5]:
def compute_results(resultset, nocomp):
    class Stats:
        pass

    stats = Stats()
    stats.total = 0

    # Metric 1
    stats.trivial = 0
    stats.noError = 0
    stats.patchError = 0
    stats.minorError = 0
    stats.majorError = 0

    stats.hasNoError = collections.Counter()
    stats.hasNoErrorAndUnique = collections.Counter()
    stats.hasMajorError = collections.Counter()
    stats.hasMinorError = collections.Counter()
    stats.hasPatchError = collections.Counter()

    # Metric 2
    stats.versionDifferences = []

    # Metric 3
    stats.vulnerableTruePositive = 0
    stats.vulnerableTrueNegative = 0
    stats.vulnerableFalsePositive = 0
    stats.vulnerableFalseNegative = 0

    # Other
    stats.packages = set()

    for result in resultset:
        if result.get("ignore", False):
            continue

        truths = set(reduce(extendReduce, [parse_pnpm_names(name) for name in result["groundTruth"]], []))
        if not nocomp and "noCompartments" in result: continue
        similarities = result["similarities"]
        for truth in truths:
            pkg, vers = truth.rsplit("@", 1)

            # We ignore packages which are not indexed
            if pkg in similarities and len(similarities[pkg]) > 0:
                scores = {k: metric(v) for k, v in similarities[pkg].items()}

                try:
                    assert vers in scores, f"Impossible to detect version. Maybe DB or lab bundle dataset are not synced?\n{truth=} {scores=}"
                except AssertionError:
                    print(f"WARNING Skipping {truth}")
                    continue

                maxScore = max(scores.values())
                maxVersions = [k for k in scores.keys() if scores[k] == maxScore]

                stats.total += 1
                stats.packages.add(pkg)

                if len(maxVersions) == len(scores):
                    stats.trivial += 1

                try:
                    distance = semver_distance_list(vers, maxVersions)
                    stats.versionDifferences.append(distance)
                except ValueError as e:
                    print(result.get("domain"), e)
                    continue

                if vers in maxVersions:
                    stats.noError += 1
                    stats.hasNoError.update({pkg: 1})
                    if len(maxVersions) == 1:
                        stats.hasNoErrorAndUnique.update({pkg: 1})

                else:

                    if distance[0] > 0:
                        stats.majorError += 1
                        stats.minorError += 1
                        stats.patchError += 1
                        stats.hasMajorError.update({pkg: 1})
                        stats.hasMinorError.update({pkg: 1})
                        stats.hasPatchError.update({pkg: 1})
                    elif distance[1] > 0:
                        stats.minorError += 1
                        stats.patchError += 1
                        stats.hasMinorError.update({pkg: 1})
                        stats.hasPatchError.update({pkg: 1})
                    elif distance[2] > 0:
                        stats.patchError += 1
                        stats.hasPatchError.update({pkg: 1})

                    detected_vulns = [vulndb.is_vulnerable(pkg, v) for v in maxVersions]
                    try:
                        if vulndb.is_vulnerable(pkg, vers):
                            if all(detected_vulns):
                                stats.vulnerableTruePositive += 1
                            elif all([not v for v in detected_vulns]):
                                stats.vulnerableFalseNegative += 1
                        else:
                            if all(detected_vulns):
                                stats.vulnerableFalsePositive += 1
                            elif all([not v for v in detected_vulns]):
                                stats.vulnerableTrueNegative += 1
                    except ValueError:
                        pass
    return stats

In [None]:
stats = compute_results(data_with_compartments, False)

In [7]:
print(f"Total results: {stats.total}")
print("")
print("General Info:")
print(f"  Trivial detected: {stats.trivial}")
print("")
print("Metric 1:")
print(f"  No Error: {stats.noError}")
print(f"  No Error and unique: {sum(stats.hasNoErrorAndUnique.values())}")
print(f"  Major Error: {stats.majorError}")
print(f"  Minor Error: {stats.minorError}")
print(f"  Patch Error: {stats.patchError}")
print("")
print("Metric 2:")
print(f"  Major Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[0] for d in stats.versionDifferences])))}")
print(f"  Minor Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[1] for d in stats.versionDifferences])))}")
print(f"  Patch Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[2] for d in stats.versionDifferences])))}")
print("")
print("Metric 3:")
print(f"  TP / FN: {stats.vulnerableTruePositive:4}  {stats.vulnerableFalseNegative:4}")
print(f"  FP / TN: {stats.vulnerableFalsePositive:4}  {stats.vulnerableTrueNegative:4}")

Total results: 7029

General Info:
  Trivial detected: 257

Metric 1:
  No Error: 2932
  No Error and unique: 1216
  Major Error: 1832
  Minor Error: 3634
  Patch Error: 4097

Metric 2:
  Major Error (min/median/mean/max): 0/0/0.6246976810357092/18
  Minor Error (min/median/mean/max): 0/0/5.541755584009105/794
  Patch Error (min/median/mean/max): 0/0/2.393228055199886/229

Metric 3:
  TP / FN:   99     2
  FP / TN:  188  3808


In [8]:
print("Distribution of package errors:")
major = [1 - stats.hasMajorError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
minor = [1 - stats.hasMinorError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
patch = [1 - stats.hasPatchError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
print(f"Count perfect major: {sum(1 for m in major if m >= 0.99)}/{len(major)}")
print(f"Count perfect minor: {sum(1 for m in minor if m >= 0.99)}/{len(minor)}")
print(f"Count perfect patch: {sum(1 for m in patch if m >= 0.99)}/{len(patch)}")

Distribution of package errors:
Count perfect major: 538/724
Count perfect minor: 365/724
Count perfect patch: 313/724


# Results w/o Compartments

In [None]:
stats = compute_results(data_without_compartments, True)

In [10]:
print(f"Total results: {stats.total}")
print("")
print("Metric 1:")
print(f"  No Error: {stats.noError}")
print(f"  Major Error: {stats.majorError}")
print(f"  Minor Error: {stats.minorError}")
print(f"  Patch Error: {stats.patchError}")
print("")
print("Metric 2:")
print(f"  Major Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[0] for d in stats.versionDifferences])))}")
print(f"  Minor Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[1] for d in stats.versionDifferences])))}")
print(f"  Patch Error (min/median/mean/max): {'/'.join(map(str, compute_statistics([d[2] for d in stats.versionDifferences])))}")
print("")
print("Metric 3:")
print(f"  TP / FN: {stats.vulnerableTruePositive:4}  {stats.vulnerableFalseNegative:4}")
print(f"  FP / TN: {stats.vulnerableFalsePositive:4}  {stats.vulnerableTrueNegative:4}")
print("Distribution of package errors:")
major = [1 - stats.hasMajorError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
minor = [1 - stats.hasMinorError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
patch = [1 - stats.hasPatchError.get(pkg, 0) / (stats.hasNoError.get(pkg, 0) + stats.hasPatchError.get(pkg, 0)) for pkg in stats.packages]
print(f"Count perfect major: {sum(1 for m in major if m >= 0.99)}/{len(major)}")
print(f"Count perfect minor: {sum(1 for m in minor if m >= 0.99)}/{len(minor)}")
print(f"Count perfect patch: {sum(1 for m in patch if m >= 0.99)}/{len(patch)}")

Total results: 16394

Metric 1:
  No Error: 3660
  Major Error: 6421
  Minor Error: 11494
  Patch Error: 12734

Metric 2:
  Major Error (min/median/mean/max): 0/0.0/1.2003781871416372/18
  Minor Error (min/median/mean/max): 0/1.0/5.56770769793827/894
  Patch Error (min/median/mean/max): 0/1.0/3.139075271440771/229

Metric 3:
  TP / FN:  235    50
  FP / TN: 1203  11246
Distribution of package errors:
Count perfect major: 696/1185
Count perfect minor: 385/1185
Count perfect patch: 291/1185
