In [24]:
import bson
import datetime
import urllib.parse
from collections import Counter, defaultdict
import json
import os
import socket
import sys

import numpy as np
import semantic_version as sv

from common import *
from vulnerability_database import VulnerabilityDatabase

#### Resultset generated with

```
# Takes 2h20m
LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$VIRTUAL_ENV/lib/python3.12/site-packages/dolospy" PORT=4200 python aletheia_speed_eval.py --requires-sourcemap --worker $(nproc) -o $DATASETS/results-update-sourcemap-aletheia.bson -s $DATASETS/object-storage.tar $DATASETS/bundles-daily/*
# Takes 1 hour
python aletheia_speed_eval_recover.py --restore-order -o $DATASETS/update-behavior-aletheia.json -r $DATASETS/results-update-sourcemap-aletheia.bson $DATASETS/bundles-daily/*
```

In [26]:
with open(os.path.join(DATASETS, "update-behavior-aletheia.json"), "r") as f:
    data = json.load(f)

vulndb = VulnerabilityDatabase(os.path.join(DATASETS, "vulndb.json"))

In [3]:
def metric(similarity_dict):
    """
    Compute a similarity score

    :param similarity_dict: dict keys: "covered", "leftTotal", "rightTotal"
    :return: float
    """
    return similarity_dict["covered"] / similarity_dict["leftTotal"] if similarity_dict["leftTotal"] > 0 else 0

def detect_packages(similarities):
    detected = {}
    for pkg, versions in similarities.items():
        scored_versions = [(version, metric(sim)) for version, sim in versions.items()]
        if len(scored_versions) > 0:
            max_score = max(s[1] for s in scored_versions)
            detected[pkg] = [s[0] for s in scored_versions if s[1] >= max_score]
    return detected
    
cleaned_data = [[d for d in day if len(d.get("similarities", [])) > 0] for day in data]

In [4]:
print("Total days", len(data))
print("Usable rows first day", len(cleaned_data[0]))

Total days 43
Usable rows first day 6002


In [5]:
discarded_libraries = {'cssfilter',
 '@emotion/sheet',
 'amazon-cognito-identity-js',
 'yup',
 '@formatjs/intl-numberformat',
 'react-player',
 '@radix-ui/react-slot',
 '@videojs/vhs-utils',
 '@turf/helpers',
 '@trpc/client',
 '@floating-ui/utils',
 '@emotion/react',
 'cn-request',
 '@sentry/utils',
 '@react-aria/utils',
 'ms',
 '@tanstack/react-query',
 'i18next-http-backend',
 'call-bind',
 'ramda',
 'luxon',
 '@aws-amplify/auth',
 '@chakra-ui/alert',
 'rxjs',
 'tslib',
 '@firebase/installations',
 'domelementtype',
 'core-js',
 'gatsby-script',
 'react-focus-lock',
 '@alifd/overlay',
 'babel-runtime',
 '@swc/helpers',
 '@chakra-ui/react-use-disclosure',
 '@guardian/source',
 'es6-promise',
 'dom-serializer',
 'dnd-core',
 'ssr-window',
 'ahooks',
 'compute-scroll-into-view',
 'redux',
 '@react-aria/interactions',
 'debug',
 '@babel/runtime-corejs3',
 '@wry/context',
 'react-dnd',
 '@radix-ui/react-use-size',
 'preact',
 '@chakra-ui/input',
 'popper.js',
 'react-transition-group',
 '@floating-ui/react',
 'unist-util-is',
 '@chakra-ui/accordion',
 '@remix-run/router',
 'react-lifecycles-compat',
 'object-inspect',
 'react-remove-scroll',
 '@react-spring/core',
 'dayjs',
 'get-intrinsic',
 'react-day-picker',
 '@chakra-ui/descendant',
 'qrcode.react',
 'next-seo',
 'detect-browser',
 '@stripe/stripe-js',
 'date-fns',
 '@chakra-ui/focus-lock',
 '@wry/equality',
 '@chakra-ui/breadcrumb',
 'react-clientside-effect',
 'react-is',
 'chroma-js',
 '@chakra-ui/utils',
 'dom7',
 'make-plural',
 'react-dnd-touch-backend',
 '@uppy/core',
 'universal-cookie',
 '@chakra-ui/transition',
 '@turf/invariant',
 '@radix-ui/react-context',
 'emoji-mart',
 '@chakra-ui/clickable',
 'gatsby-link',
 'styled-components',
 '@destinyitemmanager/dim-api-types',
 'color-parse',
 '@react-aria/focus',
 '@react-dnd/shallowequal',
 'markdown-to-jsx',
 '@coveo/headless',
 '@sentry/browser',
 'axios',
 '@radix-ui/react-presence',
 'next',
 '@tanstack/query-core',
 '@babel/runtime',
 'object-assign',
 'domhandler',
 'what-input',
 '@emotion/weak-memoize',
 'react-overlays',
 'whatwg-fetch',
 'workbox-cdn',
 '@chakra-ui/menu',
 'ufo',
 '@guardian/bridget',
 '@coveo/atomic-react',
 '@radix-ui/react-use-controllable-state',
 '@chakra-ui/shared-utils',
 'uuid',
 'classnames',
 'framer-motion',
 'has-symbols',
 '@chakra-ui/media-query',
 'mini-create-react-context',
 '@chakra-ui/layout',
 'tailwindcss',
 '@sentry/core',
 'react-remove-scroll-bar',
 '@headlessui/react',
 '@chakra-ui/react-utils',
 'side-channel',
 'validator',
 'undate',
 'ogl',
 'js-cookie',
 '@emotion/unitless',
 '@chakra-ui/image',
 'define-data-property',
 '@chakra-ui/toast',
 'section-iterator',
 'micromark',
 'unist-util-stringify-position',
 '@heroicons/react',
 '@newrelic/browser-agent',
 'color-string',
 '@react-stately/collections',
 'prebid.js',
 'react-router',
 'lodash',
 'react-fast-compare',
 'final-form',
 'dom-helpers',
 '@firebase/app',
 '@reach/utils',
 'unist-util-visit-parents',
 'react-onclickoutside',
 'react-i18next',
 '@sentry/types',
 '@fortawesome/fontawesome-svg-core',
 'react-helmet-async',
 'i18next',
 'toggle-selection',
 'mdast-util-find-and-replace',
 '@sentry/hub',
 'react-redux',
 'mdast-util-from-markdown',
 'prop-types',
 '@radix-ui/react-primitive',
 'graphql',
 '@chakra-ui/progress',
 '@coveo/atomic',
 'zen-observable-ts',
# lab pkgs
 '@turf/helpers',
 'class-transformer',
 'input-format',
 '@prismicio/helpers',
 '@chakra-ui/popper',
 '@webcomponents/shadycss',
 '@ionic/react',
 'use-debounce',
 '@zip.js/zip.js',
 'async-wait-until',
 'plupload',
 '@blueprintjs/core',
 'css-filter-converter',
 'mmenu-light',
 'rc-mentions',
 '@angular/router',
 '@react-aria/i18n',
 '@react-hook/media-query',
 'apollo-client',
 'react-favicon',
 '@bloomreach/ng-sdk',
 '@instructure/ui-tag',
 '@nivo/legends',
 '@chakra-ui/react-use-outside-click',
 '@guardian/libs',
 'react-intl',
 '@fluentui/dom-utilities',
 'search-insights',
 'just-extend',
 '@nivo/annotations',
 '@instructure/ui-radio-input',
 'react-notifications-component',
 'backoff-rxjs',
 '@lume/custom-attributes',
 'react-sweet-state',
 'just-safe-set',
 'snabbdom',
 'seamless-scroll-polyfill',
 'throttleit',
 'handlebars',
 'slate-hyperscript',
 '@instructure/ui-focusable',
 'focus-within',
 'update-input-width',
 '@swagger-api/apidom-ns-openapi-3-1',
 '@dnd-kit/modifiers',
 'lower-case',
 '@chakra-ui/theme',
 'for-each',
 '@tensorflow/tfjs-core',
 '@module-federation/sdk',
 '@mui/x-license',
 '@headlessui/vue',
 '@chakra-ui/radio',
 'dequal',
 '@chakra-ui/accordion',
 'flux-standard-action',
 '@instructure/ui-dom-utils',
 '@aws-sdk/client-cognito-identity',
 'castable-video',
 'micromark',
 'luxon',
 'hypernova',
 'cuid',
 'unist-util-visit-parents',
 '@juggle/resize-observer',
 'd3-array',
 'framer-motion',
 'react-credit-cards',
 '@emotion/react',
 'balanced-match',
 'semantic-ui-react',
 'rtl-css-js',
 'angular-recaptcha',
 'smartystreets-javascript-sdk',
 '@aacassandra/vue3-progressbar',
 '@chakra-ui/popover',
 'peach-collector',
 'fetch-retry',
 'reactcss',
 'phoenix',
 '@instructure/ui-prop-types',
 '@wry/context',
 '@uppy/webcam',
 'chakra-react-select',
 '@mapbox/mapbox-gl-supported',
 '@capacitor/camera',
 'smooth-scrollbar',
 'dom-serializer',
 '@rmwc/list',
 '@material-ui/styles',
 'json5',
 '@coldwired/react',
 'event-source-polyfill',
 '@fortawesome/free-brands-svg-icons',
 '@chakra-ui/menu',
 '@emotion/cache',
 'fetch-headers',
 '@stencil/core',
 'deepdash',
 'register-service-worker',
 '@uppy/progress-bar',
 'react-feather',
 '@formatjs/intl',
 'react-dnd-html5-backend',
 '@emotion/hash',
 '@rjsf/core',
 '@chakra-ui/toast',
 'supercluster',
 'stylis',
 'react-instantsearch-hooks',
 'http-status',
 'smoothscroll-polyfill',
 'ricos-context',
 'cookie-es',
 'react-slick',
 '@formkit/icons',
 'react-hot-keys',
 'property-information',
 'stimulus-controller-resolver',
 '@internationalized/number',
 'fancybox',
 'unsplash-js',
 'rehype-react',
 '@radix-ui/react-use-controllable-state',
 '@usercentrics/cmp-browser-sdk',
 'element-resize-detector',
 '@formkit/inputs',
 'datocms-structured-text-utils',
 '@turf/great-circle',
 '@material/line-ripple',
 'i18next',
 'attr-accept',
 'focus-trap-react',
 '@cainiaofe/cn-ui-utils',
 '@cloudinary/url-gen',
 '@aws-sdk/util-user-agent-browser',
 '@chakra-ui/react-use-disclosure',
 'get-intrinsic',
 'emoji-picker-react',
 '@fullcalendar/timegrid',
 'ttag',
 '@preact/signals-react',
 'encode-utf8',
 '@backpackjs/storefront',
 'unist-util-is',
 'uuid',
 'victory-chart',
 '@lit/reactive-element',
 '@turf/meta',
 'dnd-core',
 'proj4',
 'uikit',
 '@module-federation/webpack-bundler-runtime',
 '@jridgewell/sourcemap-codec',
 '@firebase/auth-compat',
 '@smithy/util-body-length-browser',
 '@sitecore-jss/sitecore-jss',
 '@react-spring/web',
 '@tinymce/tinymce-angular',
 'filesize',
 '@chakra-ui/theme-utils',
 'query-string',
 '@instructure/ui-tree-browser',
 'remark-deflist',
 'webcomponentsjs',
 'framesync',
 '@smithy/util-endpoints',
 '@github/clipboard-copy-element',
 '@fluentui/react',
 '@opentelemetry/api',
 '@chakra-ui/alert',
 '@maskito/core',
 'react-router',
 'intl-messageformat-parser',
 '@trpc/client',
 'composed-offset-position',
 'sortablejs',
 'public-ip',
 '@attraqt/activity',
 'multibase',
 'react-hls-player',
 'ricos-styles',
 'topojson-server',
 'rc-picker',
 'caroucssel',
 '@azure/core-lro',
 '@module-federation/utilities',
 'vue-social-sharing',
 'poly-decomp',
 '@uniformdev/context-react',
 'markdown-it-emoji',
 'd3-interpolate',
 '@aws-sdk/client-kinesis',
 'leaflet-virtual-grid',
 '@chakra-ui/counter',
 'pbf',
 '@aws-sdk/client-cloudwatch-logs',
 '@react-hook/throttle',
 '@instructure/ui-grid',
 'victory-polar-axis',
 '@material/radio',
 'react-helmet-async',
 'mini-create-react-context',
 '@ampproject/animations',
 'lru-cache',
 'vue3-lazyload',
 '@react-hookz/web',
 'ts-md5',
 'date-fns-tz',
 '@ngrx/store',
 '@styled-system/border',
 'valtio',
 'react-input-autosize',
 '@azure/logger',
 'babel-runtime',
 'react-final-form',
 '@coinbase/wallet-sdk',
 'react-imask',
 'aurelia-cookie',
 'unctx',
 'uint8arrays',
 '@chakra-ui/anatomy',
 '@chakra-ui/progress',
 '@fluentui/foundation-legacy',
 'toggle-selection',
 'currency-symbol-map',
 'zen-observable-ts',
 'protobufjs',
 'core-js-pure',
 'jwt-decode',
 'naive-ui',
 'parse-svg-path',
 'tinymce-word-paste-filter',
 '@fluentui/utilities',
 'd3-time',
 'fastest-levenshtein',
 'make-event-props',
 'html-dom-parser',
 'vfile',
 'react-onclickoutside',
 'document.contains',
 'react-leaflet',
 'nth-check',
 'grommet-icons',
 'escape-goat',
 'debounce',
 '@chakra-ui/checkbox',
 'proxy-compare',
 'react-loader-spinner',
 'payment',
 'ricos-content',
 'style-value-types',
 '@turf/destination',
 'ionicons',
 'fuse.js',
 'p-queue',
 'dot-prop',
 '@ngrx/effects',
 'gatsby-source-datocms',
 '@vue/server-renderer',
 '@uppy/image-editor',
 '@fullstory/snippet',
 'jsencrypt',
 'typical',
 'url-join',
 'dom-helpers',
 '@chakra-ui/clickable',
 'angular-cookies',
 'datocms-structured-text-to-html-string',
 'hoist-non-react-statics',
 'linkify-html',
 'tinyqueue',
 '@instructure/ui-dialog',
 '@popperjs/core',
 'echarts',
 'vooks',
 '@formatjs/icu-skeleton-parser',
 'css-animation',
 'gatsby-link',
 '@aws-amplify/pubsub',
 'raphael',
 'wcag-element-contrast',
 'popmotion',
 'vue-inline-svg',
 '@smithy/util-retry',
 'fast-unique-numbers',
 'on-finished',
 '@rjsf/mui',
 '@swup/head-plugin',
 '@react-aria/button',
 'fitvids',
 'react-remove-scroll',
 '@smithy/eventstream-serde-browser',
 '@chakra-ui/react-utils',
 '@jupyterlab/translation',
 'use-sound',
 'redux-actions',
 'elem-dataset',
 '@formatjs/icu-messageformat-parser',
 'react-focus-lock',
 'react-calendar',
 'react-dropzone',
 'graphql-ws',
 '@uppy/aws-s3',
 '@plasmicpkgs/react-slick',
 '@instructure/ui-popover',
 '@okta/okta-auth-js',
 'mdast-util-from-markdown',
 '@instructure/ui-i18n',
 '@noble/hashes',
 'vue-svgicon',
 'vue-i18n-bridge',
 'plyr',
 '@destinyitemmanager/dim-api-types',
 'dom-utils',
 '@azure/core-paging',
 'reakit-system',
 'preact-i18n',
 'section-iterator',
 'ramda',
 'react-responsive-masonry',
 '@azure/storage-blob',
 'cookie',
 'motion',
 '@feature-hub/core',
 'ricos-schema',
 'acorn',
 'react-transition-group',
 'class-validator',
 '@radix-ui/react-toast',
 'rooks',
 'graphemesplit',
 '@react-aria/label',
 '@react-spring/core',
 '@radix-ui/react-dismissable-layer',
 'constate',
 'hast-util-raw',
 'wix-rich-content-common',
 '@floating-ui/react',
 'react-bem-helper',
 '@smithy/core',
 'rc-util',
 '@gilbarbara/deep-equal',
 'vue-check-view',
 'redux-first-router',
 'react-collapsed',
 '@lingui/message-utils',
 '@aws-sdk/middleware-recursion-detection',
 'victory-scatter',
 'swagger-client',
 'format-message-generate-id',
 '@mindbodygreen/analytics-tracking-helpers',
 '@sveltejs/kit',
 '@iabtcf/core',
 'define-data-property',
 '@emotion/sheet',
 '@instructure/ui-react-utils',
 '@plasmicpkgs/plasmic-basic-components',
 'p-cancelable',
 'random-seed',
 '@vue/shared',
 'ts-dot-prop',
 'lodash.isinteger',
 'shave',
 'ngx-swiper-wrapper',
 'markdown-it',
 '@porsche-design-system/components-js',
 '@testing-library/jest-dom',
 '@bundled-es-modules/tough-cookie',
 '@smithy/eventstream-codec',
 '@greatsumini/react-facebook-login',
 '@capacitor/haptics',
 'gatsby-script',
 'bootstrap',
 'tslib',
 '@ag-grid-community/angular',
 'timers-browserify',
 '@chakra-ui/textarea',
 'tooltip.js',
 'popper.js',
 '@uifabric/icons',
 '@reach/menu-button',
 'apollo-link-error',
 'linkify-it',
 '@sentry/svelte',
 'lucide-react',
 'react-countup',
 '@sitecore-jss/sitecore-jss-vue',
 'deep-diff',
 'twemoji',
 '@aws-sdk/xml-builder',
 'mathlive',
 'domutils',
 'flat',
 '@chakra-ui/modal',
 'rc-tree-select',
 'stimulus-autocomplete',
 'flatted',
 'async-mutex',
 '@azure/core-util',
 'react-from-dom',
 '@react-aria/dialog',
 '@material/dom',
 '@ngrx/router-store',
 'ts-invariant',
 'react-fast-compare',
 '@smithy/config-resolver',
 'multicodec',
 'react-clientside-effect',
 'virtua',
 '@fullcalendar/daygrid',
 '@firebase/app-check',
 'lodash',
 '@uppy/status-bar',
 '@chakra-ui/focus-lock',
 '@smithy/querystring-builder',
 'react-lifecycles-compat',
 '@merkur/core',
 '@chakra-ui/image',
 '@videojs/vhs-utils',
 'intl-messageformat',
 '@formily/json-schema',
 '@microsoft/signalr',
 '@react-aria/tooltip',
 'svg-pathdata',
 'react-clock',
 'sweepline-intersections',
 'd3-color',
 '@azure/msal-react',
 '@aws-crypto/crc32',
 'postcode-validator',
 'reduce-reducers',
 'randexp',
 '@sentry/browser',
 'vue-sweetalert2',
 'date-fns',
 'decouple',
 '@react-spring/shared',
 'blingblingjs',
 '@aws-sdk/middleware-bucket-endpoint',
 'domelementtype',
 'throttle-debounce',
 'debounce-promise',
 'is-what',
 '@prismicio/react',
 '@fastr/errors',
 '@fluentui/web-components',
 '@chakra-ui/select',
 'wheel-gestures',
 '@ionic/core',
 'material-ui-popup-state',
 'react-google-one-tap-login',
 '@react-leaflet/core',
 'isbot',
 'd3-scale',
 '@instructure/ui-form-field',
 '@pixi/polyfill',
 '@radix-ui/react-scroll-area',
 'ramda-adjunct',
 'value-equal',
 'smartbanner.js',
 'fast-safe-stringify',
 'react-is',
 'react-infinite-scroller',
 '@hh.ru/magritte-common-metrics',
 '@internationalized/date',
 'topojson-client',
 '@uppy/core',
 '@fluentui/react-focus',
 'clone',
 'bent',
 'sifter',
 '@ampproject/worker-dom',
 'react-property',
 'vue-resource',
 '@appsignal/core',
 '@formatjs/intl-listformat',
 'wasm-feature-detect',
 'case-anything',
 'numbro',
 'detect-browser',
 'kdbush',
 'create-react-class',
 '@codetrix-studio/capacitor-google-auth',
 'topojson-simplify',
 '@amazeelabs/scalars',
 '@material/mwc-textfield',
 'd3-shape',
 '@chakra-ui/dom-utils',
 '@formatjs/intl-getcanonicallocales',
 'long',
 '@material/notched-outline',
 'base64-arraybuffer',
 '@chakra-ui/styled-system',
 'seemly',
 'mem',
 '@motionone/generators',
 'react-awesome-reveal',
 'vue-runtime-helpers',
 'solid-js',
 'media-chrome',
 'make-plural',
 'astronomia',
 '@chakra-ui/transition',
 '@conform-to/dom',
 'mathjs',
 'svelte-i18n',
 '@radix-ui/react-checkbox',
 '@financial-times/o-date',
 'vuebar',
 'consola',
 '@instructure/ui-range-input',
 '@instructure/ui-motion',
 'react-qr-code',
 'axios-retry',
 '@doist/react-interpolate',
 '@github/paste-markdown',
 '@angular/core',
 'html-react-parser',
 'color-string',
 'delaunator',
 '@vueuse/integrations',
 '@smithy/querystring-parser',
 '@shoelace-style/shoelace',
 '@financial-times/privacy-legislation-client',
 'earcut',
 'victory-line',
 '@azure/core-client',
 'potpack',
 'enquire.js',
 'memoize-one',
 'multihashes',
 '@chakra-ui/number-input',
 '@angular/fire',
 '@phosphor-icons/react',
 'rc-tooltip',
 '@chakra-ui/utils',
 'react-redux',
 '@awesome-cordova-plugins/core',
 'react-tag-autocomplete',
 'apollo-link-sentry',
 '@babel/runtime',
 'd3-format',
 'smooch',
 'ts-custom-error',
 '@team-griffin/react-heading-section',
 'deep-equal',
 'vuex-persist',
 'rudder-sdk-js',
 'colorthief',
 'domhandler',
 '@smithy/protocol-http',
 '@ungap/url-search-params',
 '@jupyterlab/observables',
 'react-overlays',
 'react-day-picker',
 '@sentry/astro',
 'history',
 'preact',
 'oblivious-set',
 '@splidejs/splide-extension-video',
 'angular',
 'runes2',
 '@sentry/release-parser',
 'circular-json',
 'rrule',
 'css-select',
 '@turf/invariant',
 'component-cookie',
 'html-to-image',
 'react-remove-scroll-bar',
 'eventemitter2',
 '@swagger-api/apidom-json-pointer',
 '@trpc/next',
 'react-dnd',
 'react-draggable',
 'instantsearch.js',
 '@purple-dot/browser',
 '@smithy/util-stream',
 'blurhash',
 'scratch-parser',
 '@chakra-ui/media-query',
 'shopify-buy',
 '@mux/playback-core',
 'detect-ua',
 'ogl',
 'redom',
 'angular8-yandex-maps',
 '@aws-sdk/region-config-resolver',
 '@react-pdf/pdfkit',
 'memoizerific',
 'promise-polyfill',
 '@primer/behaviors',
 'abab',
 'd3-contour',
 '@module-federation/runtime',
 '@azure/core-rest-pipeline',
 'color-convert',
 'filepond-plugin-file-validate-type',
 '@instructure/canvas-theme',
 'vuex-composition-helpers',
 'jsonrepair',
 '@uppy/dashboard',
 '@angular/common',
 'v-calendar',
 '@formatjs/ecma402-abstract',
 '@guardian/source',
 'emoji-mart',
 '@feature-hub/react',
 '@noble/curves',
 '@firebase/messaging',
 'core-js',
 '@chakra-ui/descendant',
 'stacktrace-parser',
 'd3-ease',
 'focus-lock',
 'emoji-regex',
 'next-myft-client',
 '@smithy/eventstream-serde-universal',
 '@opentelemetry/core',
 'zustand',
 '@emotion/weak-memoize',
 'd3-dispatch',
 'ahooks',
 'rc-upload',
 'backbone.cocktail',
 'canvas',
 '@instructure/ui-flex',
 '@shopify/react-cookie',
 '@aws-amplify/cache',
 'wav-encoder',
 'react-pose',
 'react-twitter-widgets',
 'js-md5',
 'vega-tooltip',
 'why-did-you-update',
 '@instructure/ui-icons',
 '@metamask/utils',
 '@radix-ui/react-popper',
 'ical.js',
 'eventemitter3',
 '@growthbook/growthbook-react',
 'micro-dash',
 '@emotion/styled',
 '@politiet/pds',
 '@swagger-api/apidom-ns-openapi-3-0',
 '@feature-hub/history-service',
 'd3-brush',
 '@fastr/headers',
 'clabe-validator',
 'react-number-format',
 'inline-style-prefixer',
 'semver',
 'react-fast-marquee',
 '@vidstack/react',
 '@react-spring/animated',
 '@shopify/app-bridge-core',
 '@uppy/utils',
 'react-chartjs-2',
 'performance-now',
 'has-symbols',
 '@splitsoftware/splitio',
 'slate',
 'sjcl',
 '@cloudinary/transformation-builder-sdk',
 'qj',
 'is-extended',
 'tree-changes',
 'get-value',
 '@segment/isodate',
 'rxjs',
 '@gtm-support/core',
 '@lexical/history',
 'gl-matrix',
 '@chakra-ui/layout',
 '@opentelemetry/instrumentation-xml-http-request',
 '@elastic/react-search-ui-views',
 'd3-path',
 '@hotwired/turbo-rails',
 'what-input',
 'is-dom-node-list',
 'analytics',
 'qrcode',
 'sister',
 'htm',
 '@chakra-ui/input',
 'css-in-js-utils',
 'markdown-to-jsx',
 '@visx/text',
 '@instructure/canvas-high-contrast-theme',
 'es5-ext',
 'react-pdf',
 '@github/template-parts',
 'react-topbar-progress-indicator',
 'element-ui',
 '@instructure/ui-a11y-utils',
 'cn-request',
 'broadcast-channel',
 'next',
 '@flowplayer/player',
 'angular2-uuid',
 '@smithy/smithy-client',
 'victory-core',
 'mdast-util-compact',
 '@ngneat/until-destroy',
 'keen-slider',
 'undate',
 '@ant-design/colors',
 'react-animate-height',
 'jss-extend',
 'defu',
 '@zag-js/focus-visible',
 '@socket.io/component-emitter',
 '@headlessui/tailwindcss',
 '@instructure/ui-img',
 'prebid.js',
 '@turf/boolean-point-on-line',
 'jose',
 'mediaquery',
 'pony-cause',
 '@firebase/auth',
 '@instructure/ui-position',
 'ts-error',
 'range-slider-wc',
 'eventhub-jsclient',
 'webrtc-adapter',
 '@radix-ui/react-context',
 '@smithy/signature-v4',
 'react-hook-form',
 '@react-spring/rafz',
 'entities',
 'tiny-cookie',
 'pacto',
 'proxy-polyfill',
 '@uppy/compressor',
 '@researchgate/react-intersection-observer',
 'revenge',
 'apollo-link-http',
 'redux-observable',
 'twgl.js',
 'vue-currency-input',
 'web-vitals',
 '@chakra-ui/tooltip',
 '@github/textarea-autosize',
 '@restart/hooks',
 '@smithy/middleware-retry',
 '@shopify/performance',
 'zrender',
 '@nivo/colors',
 'urlpattern-polyfill',
 'dom-mutator',
 '@chakra-ui/shared-utils',
 'memize',
 'react-dnd-touch-backend',
 'simplebar-react',
 'mdurl',
 '@instructure/ui-text-input',
 'http-status-codes',
 '@cycjimmy/awesome-js-funcs',
 'use-force-update',
 'msgpack-lite',
 'ts-deepmerge',
 'validatorjs',
 'es6-shim',
 '@blueprintjs/icons',
 '@aws-sdk/core'
}

In [6]:
results = defaultdict(lambda: defaultdict(list))
results_underestimate = defaultdict(lambda: defaultdict(list))
base_date = datetime.datetime.fromisoformat("2024-10-31T18:00:00Z")

stats_to_plot = {}

In [7]:
def extract_detected_version_overestimate(versions):
    s = sorted(versions, key=sv.Version.coerce, reverse=True)
    return s[0] if len(s) > 0 else None

def extract_detected_version_underestimate(versions):
    s = sorted(versions, key=sv.Version.coerce)
    return s[0] if len(s) > 0 else None

In [8]:
for n, day in enumerate(cleaned_data):
    for d in day:
        domain = d.get("domain")
        libraries = set((pkg, extract_detected_version_overestimate(verss)) for pkg, verss in detect_packages(d["similarities"]).items() if 4 > len(verss) > 0 and pkg not in discarded_libraries)
        histories = defaultdict(list)
        for lib, vers in libraries:
            histories[lib].append(vers)
        for lib, verss in histories.items():
            results[domain][lib].append(list(map(sv.Version.coerce, sorted(verss))))
        for lib in results[domain]:
            if len(results[domain][lib]) < n+1:
                results[domain][lib].append([])  # Preserve one measurement per day

for n, day in enumerate(cleaned_data):
    for d in day:
        domain = d.get("domain")
        libraries = set((pkg, extract_detected_version_underestimate(verss)) for pkg, verss in detect_packages(d["similarities"]).items() if 4 > len(verss) > 0 and pkg not in discarded_libraries)
        histories = defaultdict(list)
        for lib, vers in libraries:
            histories[lib].append(vers)
        for lib, verss in histories.items():
            results_underestimate[domain][lib].append(list(map(sv.Version.coerce, sorted(verss))))
        for lib in results_underestimate[domain]:
            if len(results_underestimate[domain][lib]) < n+1:
                results_underestimate[domain][lib].append([])  # Preserve one measurement per day

In [9]:
def check_all_sites_with_updates(underestimate=False):
    not_updated_libs = 0
    not_updated_domains = 0
    not_monotonous = 0

    r = results if not underestimate else results_underestimate
    
    for domain, domain_data in r.items():
        no = len(domain_data) > 0
        
        for library, history in domain_data.items():
            if all([n < 2 for n in set(map(len, history))]):  # we ignore multiple installed versions in parallel
                if len(set([v[0] for v in history if len(v) > 0])) > 1:
                    no = False
                        
                    parsed_history = [v[0] for v in history if len(v) > 0]
                    non_empty_history = [(n, v[0]) for n, v in enumerate(history) if len(v) > 0]
                    
                    if all(v1 <= v2 for v1, v2 in zip(parsed_history, parsed_history[1:])):
                        updates = [(base_date + datetime.timedelta(days=n), v2) for (_, v1), (n, v2) in zip(non_empty_history, non_empty_history[1:]) if v1 != v2]
                        if library in vulndb.releases:
                            for update_time, version in updates:
                                if str(version) in vulndb.releases[library]:
                                    update_time_diff = update_time - datetime.datetime.fromisoformat(vulndb.releases[library][str(version)])
                                    print(f"{domain=} {library=} {version=} {update_time_diff.days=}")
                                else:
                                    print(f"WARNING: Missing version {version} for library {library}")
                        else:
                            print(f"WARNING: No release info for {library}")
                    else:
                        not_monotonous += 1
                else:
                    not_updated_libs += 1
        if no:
            not_updated_domains += 1
    
    print(f"{not_updated_domains=} {not_updated_libs=} {not_monotonous=}")

In [None]:
check_all_sites_with_updates()

## From how many domains do we have data?

In [11]:
print(sum(1 for r in results.values() if len(r) > 0))  #  before lab discard: 2873

2805


## When a new library version gets released, what percentage of domains uses the updates within 1/4/16 weeks?

In [12]:
def match_date_interval(bundledVersion: sv.Version, release_list: dict, date: datetime.datetime, interval: datetime.timedelta) -> bool:
    for version, release_date in reversed(list(release_list.items())):
        if version == "modified" or version == "created": continue

        if date - release_date < interval:
            try:
                release_version = coerce_version(version)
            except ValueError:
                continue

            if bundledVersion == release_version:
                return True

    return False

In [13]:
def library_update_stats(underestimate=False):
    intervals = [datetime.timedelta(days=i) for i in [7, 4*7, 16*7]]
    libraries = set(library for domain_data in results.values() for library in domain_data.keys())
    libraries_not_indexed = set(library for library in libraries if library not in vulndb.releases)
    libraries_with_recent_updates = set(library for library in libraries.difference(libraries_not_indexed) if len(vulndb.releases[library]) > 0 and base_date - datetime.datetime.fromisoformat(vulndb.releases[library]["modified"]) < max(intervals))
    # libraries_with_recent_updates = considered_libraries

    stats = [[], [], []]
    prevalences = [[], [], []]

    r = results if not underestimate else results_underestimate
    
    for library in libraries_with_recent_updates:
        release_dates = {v: datetime.datetime.fromisoformat(vulndb.releases[library][v]) for v in vulndb.releases[library]}
        release_order = list([k for k in vulndb.releases[library].keys() if k not in ["created", "modified"]])  # npm already has release order
        
        for i, interval in enumerate(intervals):
            count = [0, 0]
            prevalence = 0
            for domain, domain_data in r.items():
                if library in domain_data:
                    history = domain_data[library]
                    found_this_or_later_in_interval = False
                    for day, versions in enumerate(history):
                        for v in versions:
                            if match_date_interval(v, release_dates, base_date + datetime.timedelta(days=day), interval):
                                found_this_or_later_in_interval = True
                                # print(f"hit {v=} {domain=} {library=} {interval.days=}")
                                break
                        if found_this_or_later_in_interval:
                            break
                    count[0 if found_this_or_later_in_interval else 1] += 1
                    if found_this_or_later_in_interval:
                        pass
                    prevalence += 1

            if sum(count) > 0:
                stats[i].append(count[0] / sum(count))
                prevalences[i].append(prevalence)
    return stats, prevalences

In [14]:
stats, prevalences = library_update_stats()

print(f"1 week: {compute_statistics(stats[0])}")
print(f"4 week: {compute_statistics(stats[1])}")
print(f"16 week: {compute_statistics(stats[2])}")

stats_normalized = [np.multiply(np.multiply(stat, prevalence), len(prevalence) / np.array(prevalence).sum()) for stat, prevalence in zip(stats, prevalences)]

print(f"1 week (normalized): {list(map(float, compute_statistics(stats_normalized[0])))}")
print(f"4 week (normalized): {list(map(float, compute_statistics(stats_normalized[1])))}")
print(f"16 week (normalized): {list(map(float, compute_statistics(stats_normalized[2])))}")

stats_to_plot["library_stats"] = stats
stats_to_plot["library_instances_stats"] = [list(s) for s in stats_normalized]

1 week: [0.0, 0.0, 0.05449274275449468, 1.0]
4 week: [0.0, 0.0, 0.08425070515282729, 1.0]
16 week: [0.0, 0.0, 0.16437986091062015, 1.0]
1 week (normalized): [0.0, 0.0, 0.09702330805953385, 14.126088177478238]
4 week (normalized): [0.0, 0.0, 0.12763268744734627, 17.147711317045776]
16 week (normalized): [0.0, 0.0, 0.23336141533277172, 18.205279415894413]


In [15]:
stats, prevalences = library_update_stats(True)

print(f"1 week: {compute_statistics(stats[0])}")
print(f"4 week: {compute_statistics(stats[1])}")
print(f"16 week: {compute_statistics(stats[2])}")

stats_normalized = [np.multiply(np.multiply(stat, prevalence), len(prevalence) / np.array(prevalence).sum()) for stat, prevalence in zip(stats, prevalences)]

print(f"1 week (normalized): {list(map(float, compute_statistics(stats_normalized[0])))}")
print(f"4 week (normalized): {list(map(float, compute_statistics(stats_normalized[1])))}")
print(f"16 week (normalized): {list(map(float, compute_statistics(stats_normalized[2])))}")

stats_to_plot["library_stats_underestimate"] = stats
stats_to_plot["library_instances_stats_underestimate"] = [list(s) for s in stats_normalized]

1 week: [0.0, 0.0, 0.023304156148727235, 1.0]
4 week: [0.0, 0.0, 0.041714642426342013, 1.0]
16 week: [0.0, 0.0, 0.10361251742350877, 1.0]
1 week (normalized): [0.0, 0.0, 0.03566413928671722, 11.859870822802584]
4 week (normalized): [0.0, 0.0, 0.06121875877562483, 15.93906206121876]
16 week (normalized): [0.0, 0.0, 0.13900589721988207, 18.205279415894413]


## How many domains update their dependencies?

In [16]:
def domain_update_stats(underestimate=False):
    intervals = [datetime.timedelta(days=i) for i in [7, 4*7, 16*7]]
    libraries = set(library for domain_data in results.values() for library in domain_data.keys())
    libraries_not_indexed = set(library for library in libraries if library not in vulndb.releases)
    libraries_with_recent_updates = set(library for library in libraries.difference(libraries_not_indexed) if len(vulndb.releases[library]) > 0 and base_date - datetime.datetime.fromisoformat(vulndb.releases[library]["modified"]) < max(intervals))
    # libraries_with_recent_updates = considered_libraries

    stats = [[], [], []]

    r = results if not underestimate else results_underestimate
    
    for domain, domain_data in r.items():
        
        for i, interval in enumerate(intervals):
            count = [0, 0]
            for library in libraries_with_recent_updates:
                if library in domain_data:
                    release_dates = {v: datetime.datetime.fromisoformat(vulndb.releases[library][v]) for v in vulndb.releases[library]}
                    release_order = list([k for k in vulndb.releases[library].keys() if k not in ["created", "modified"]])  # npm already has release order
                    history = domain_data[library]
                    found_this_or_later_in_interval = False
                    for day, versions in enumerate(history):
                        for v in versions:
                            if match_date_interval(v, release_dates, base_date + datetime.timedelta(days=day), interval):
                                found_this_or_later_in_interval = True
                                break
                        if found_this_or_later_in_interval:
                            break
                    count[0 if found_this_or_later_in_interval else 1] += 1
                    if found_this_or_later_in_interval:
                        # don't print, too many occurences due to spec matching
                        # print(f"hit {domain=} {library=} {interval.days=}")
                        pass
            if sum(count) > 0:
                stats[i].append(count[0] / sum(count))
    return stats

In [17]:
stats = domain_update_stats()

print(f"1 week: {compute_statistics(stats[0])}")
print(f"4 week: {compute_statistics(stats[1])}")
print(f"16 week: {compute_statistics(stats[2])}")

stats_to_plot["domain_stats"] = stats

1 week: [0.0, 0.0, 0.08343483511315831, 1.0]
4 week: [0.0, 0.0, 0.13195765696739406, 1.0]
16 week: [0.0, 0.0, 0.23298538390522813, 1.0]


In [18]:
stats = domain_update_stats(True)

print(f"1 week: {compute_statistics(stats[0])}")
print(f"4 week: {compute_statistics(stats[1])}")
print(f"16 week: {compute_statistics(stats[2])}")

stats_to_plot["domain_stats_underestimate"] = stats

1 week: [0.0, 0.0, 0.05440202925142684, 1.0]
4 week: [0.0, 0.0, 0.09600450607382748, 1.0]
16 week: [0.0, 0.0, 0.17875988607849877, 1.0]


## How many vulnerable libraries are included per domain?

In [27]:
def vulnerable_libs_per_domain(underestimate=False):
    what_libraries = Counter()
    safe_libraries = Counter()
    n_days = len(data)
    
    stats = []

    r = results if not underestimate else results_underestimate
    
    for day in range(n_days):
        stats.append([0, 0])
        for domain, domain_data in r.items():
            domainstats = [0, 0]
            for library, history in domain_data.items():
                if day >= len(history): continue
                
                versions = history[day]
                if len(versions) == 1:
                    version = versions[0]
                    if vulndb.is_vulnerable(library, str(version)):
                        domainstats[0] += 1
                        what_libraries.update({library: 1})
                    else:
                        domainstats[1] += 1
                        safe_libraries.update({library: 1})
            if sum(domainstats) > 0:
                stats[-1][0] += domainstats[0] / sum(domainstats)
                stats[-1][1] += domainstats[1] / sum(domainstats)
    
    print("Most common vulnerable libraries:")
    display(what_libraries.most_common(10))
    print("Most common unvulnerable libraries:")
    display(safe_libraries.most_common(10))
    
    print("Stats (vulnerable vs safe) for each day:")
    display(f"mean: {np.average([s[0] / sum(s) for s in stats if sum(s) > 0])}")
    display([s[0] / sum(s) for s in stats if sum(s) > 0])

In [31]:
vulnerable_libs_per_domain()

Most common vulnerable libraries:


[('vue', 2303),
 ('moment', 1531),
 ('dompurify', 1050),
 ('jquery', 756),
 ('jquery-ui', 361),
 ('crypto-js', 318),
 ('elliptic', 309),
 ('browserify-sign', 229),
 ('html-parse-stringify2', 214),
 ('url-parse', 203)]

Most common unvulnerable libraries:


[('process', 16926),
 ('react-dom', 13054),
 ('iso8601-duration', 9870),
 ('react', 9030),
 ('lodash.merge', 8719),
 ('lodash.clonedeep', 8524),
 ('setimmediate', 7029),
 ('regenerator-runtime', 6035),
 ('punycode', 5546),
 ('url', 5119)]

Stats (vulnerable vs safe) for each day:


'mean: 0.022146135045714863'

[0.02135447571345011,
 0.024854872322224306,
 0.020123243944495512,
 0.0249950248982216,
 0.02199786663326645,
 0.026873033286384927,
 0.021428496736953174,
 0.023951415845085238,
 0.02054791736368703,
 0.02394302506920162,
 0.0210716046292782,
 0.022222095866076336,
 0.02219214399538801,
 0.02471598449446423,
 0.022676444002144063,
 0.024720770044812816,
 0.023818779447587625,
 0.024392128239867555,
 0.021275009636189463,
 0.02552881654599798,
 0.02243336965225363,
 0.021866135863386957,
 0.020325136858714867,
 0.02132986640092048,
 0.022589220970897335,
 0.02368457835865072,
 0.019966213697168424,
 0.02128329128873114,
 0.023200004769066442,
 0.0219385981786534,
 0.021487273586787873,
 0.02277479573129805,
 0.023037813114601104,
 0.020038056617365296,
 0.020826384122452333,
 0.021099525483714427,
 0.019605757558609434,
 0.022207661941260098,
 0.018615056816118566,
 0.021521678849914277,
 0.019358940964188016,
 0.021255213530434806,
 0.01915608389577515]

In [32]:
vulnerable_libs_per_domain(True)

Most common vulnerable libraries:


[('domify', 2997),
 ('moment', 2856),
 ('vue', 2303),
 ('decode-uri-component', 1922),
 ('dompurify', 1050),
 ('jquery', 756),
 ('crypto-js', 489),
 ('jquery-ui', 361),
 ('lodash-es', 328),
 ('elliptic', 309)]

Most common unvulnerable libraries:


[('process', 16926),
 ('react-dom', 13054),
 ('iso8601-duration', 9870),
 ('react', 9030),
 ('lodash.merge', 8719),
 ('lodash.clonedeep', 8524),
 ('setimmediate', 7029),
 ('regenerator-runtime', 6035),
 ('punycode', 5546),
 ('url', 5119)]

Stats (vulnerable vs safe) for each day:


'mean: 0.033205281499473126'

[0.03560039948462221,
 0.03646100046123012,
 0.03338732521128083,
 0.035295915022207615,
 0.033329115019886855,
 0.037263027033528635,
 0.03183708087268215,
 0.0348747518480296,
 0.034088734747109495,
 0.032675639716889665,
 0.03481143447770226,
 0.03261818180968032,
 0.03317890160238015,
 0.0345504648013866,
 0.03486936530718117,
 0.03356531779573972,
 0.032733259773936235,
 0.03418428411654612,
 0.03466665345695829,
 0.0339570974768736,
 0.03275468250543875,
 0.03349875192528371,
 0.03276456831478943,
 0.030627949508999965,
 0.03332388430163077,
 0.03542109887378202,
 0.032675362749632716,
 0.030006154407262738,
 0.034305730587467324,
 0.032896282701519265,
 0.0341811778707443,
 0.032103924971270834,
 0.034372549699877596,
 0.028830481236417838,
 0.034188352311885786,
 0.03301608048396781,
 0.02972613608756817,
 0.03108903413915533,
 0.0333097859859908,
 0.03011840606392897,
 0.030021554877895856,
 0.03223815489820859,
 0.03240904993877404]