# Zero Dependencies

## Setup

In [57]:
import json
from collections import Counter

In [58]:
#top x most depended upon libraries
most_depended_upon_10_path = '../data/most_depended_upon10.json'
most_depended_upon_100_path = '../data/most_depended_upon100.json'
most_depended_upon_500_path = '../data/most_depended_upon500.json'

#dependency chain of root x libraries
dependency_chain_10_path = '../data/dependency_chain10.json'
dependency_chain_100_path = '../data/dependency_chain100.json'
dependency_chain_500_path = '../data/dependency_chain500.json'

#generated file paths - libraries with zero dependencies in most depended
zero_10_path = '../data/zero10.json'
zero_100_path = '../data/zero100.json'
zero_500_path = '../data/zero500.json'

#generated file paths - libraries with zero dependencies in chain
zero_on_chain_10_path = '../data/zero_on_chain10.json'
zero_on_chain_100_path = '../data/zero_on_chain100.json'
zero_on_chain_500_path = '../data/zero_on_chain500.json'

#registry data
registry_entries = '../data/registry_entries.json'

In [69]:
registry_map = {}

#load in only what data is needed
for p in json.load(open(registry_entries)):
    p_name = p['name']
    p_data = {}
    p_data['versions'] = p['versions']
    p_data['repository'] =  None
    if 'repository' in p:
        p_data['repository'] = p['repository']
    registry_map[p_name] = p_data


## Most depended upon libraries from Libraries.io

In [60]:
#the libraries.io info for each package
most_depended_upon_10 = json.load(open(most_depended_upon_10_path))
most_depended_upon_100 = json.load(open(most_depended_upon_100_path))
most_depended_upon_500 = json.load(open(most_depended_upon_500_path))

print(len(most_depended_upon_10))
print(len(most_depended_upon_100))
print(len(most_depended_upon_500))

10
100
500


## Chain statistics

### Top 10

In [66]:
dependency_chain_10 = json.load(open(dependency_chain_10_path))

def chain_stats(chain):
    roots = []
    root_zeros = []
    zeros = []
    parents = []
    nodes = []
    circulars = []
    def walk_chain(chain):
        for d in chain:
            parents.append(d)
            if isinstance(d, dict):
                node = list(d.keys())[0]
                d = d[node]
                nodes.append(node)
                if len(parents) == 1:
                    roots.append(node)
                walk_chain(d)
            elif isinstance(d, str):
                node = d
                if node.endswith('...'):
                    circulars.append(node)
                else:
                    zeros.append(node)
                    if len(parents) == 1:
                        root_zeros.append(node)
                        roots.append(node)
                    nodes.append(node)
            else:
                walk_chain(d)
            parents.pop()

    walk_chain(chain)
    print('Root nodes:', len(roots))
    print('Size of chain:', len(nodes))

    unique_nodes = Counter(nodes)
    print('Unique packages on chain:', len(unique_nodes))

    average_dependency_count = (len(nodes) - len(roots)) / len(roots)
    print("Average dependency chain for root nodes:", average_dependency_count)

    print('Most frequent on chain:', unique_nodes.most_common(1)[0])

    print('Circular dependencies?:', len(circulars))

    print("Zero dependencies in chain:", len(zeros), '/', len(nodes))

    unique_zeros = list(dict.fromkeys(zeros))
    print("Unique zero dependencies in chain:", len(unique_zeros), '/', len(unique_nodes))
    print("Zero dependency roots:", len(root_zeros), "/", len(chain), '(', len(root_zeros)/len(chain), ')')

    print("\nPopular zeros:", json.dumps(root_zeros, indent=1))

    json.dump(root_zeros, open('../data/zero'+str(len(roots))+'.json', 'w'), indent=1)

    json.dump(unique_zeros, open('../data/zero_on_chain'+str(len(roots))+'.json', 'w'), indent=1)

    return {
        'roots' : roots,
        'root_zeros' : root_zeros,
        'nodes': nodes,
        'unique_nodes': unique_nodes,
        'zeros': zeros,
        'unique_zeros': unique_zeros,
    }

chain_stats_10 = chain_stats(dependency_chain_10)

Root nodes: 10
Size of chain: 504
Unique packages on chain: 216
Average dependency chain for root nodes: 49.4
Most frequent on chain: ('@webassemblyjs/helper-wasm-bytecode', 19)
Circular dependencies?: 0
Zero dependencies in chain: 304 / 504
Unique zero dependencies in chain: 115 / 216
Zero dependency roots: 5 / 10 ( 0.5 )

Popular zeros: [
 "typescript",
 "eslint-plugin-react-hooks",
 "moment",
 "prettier",
 "@types/lodash"
]


### Top 100

For a larger sample, let's look at 100 as well.

In [67]:
dependency_chain_100 = json.load(open(dependency_chain_100_path))
chain_stats_100 = chain_stats(dependency_chain_100)

Root nodes: 100
Size of chain: 333517
Unique packages on chain: 1475
Average dependency chain for root nodes: 3334.17
Most frequent on chain: ('function-bind', 42526)
Circular dependencies?: 283
Zero dependencies in chain: 196082 / 333517
Unique zero dependencies in chain: 595 / 1475
Zero dependency roots: 23 / 100 ( 0.23 )

Popular zeros: [
 "typescript",
 "eslint-plugin-react-hooks",
 "moment",
 "prettier",
 "@types/lodash",
 "@types/chai",
 "@types/mocha",
 "async",
 "commander",
 "core-js",
 "uuid",
 "react-is",
 "bootstrap",
 "redux",
 "date-fns",
 "chalk",
 "bluebird",
 "classnames",
 "eslint-config-prettier",
 "style-loader",
 "@angular/language-service",
 "jquery",
 "lodash.debounce"
]


## How many have GitHub repositories?

In [93]:

repos = []
no_repo = []
other = []
for p in chain_stats_100['unique_zeros']:
    data = registry_map[p]
    repo = data["repository"]
    # if repo == None:
    #     print(p)
    if repo != None:
        url = repo
        if 'url' in repo:
            url = repo['url']
        if 'github.com/' in url:
            repos.append(url)
        else:
            other.append(url)
    else:
        no_repo.append(p)

print("With GitHub:", len(repos), "/", len(chain_stats_100['unique_zeros']), "(%s)" % (len(repos)/len(chain_stats_100['unique_zeros'])))
print("With Other (GitLab):", len(other), "/", len(chain_stats_100['unique_zeros']), "(%s)" % (len(other)/len(chain_stats_100['unique_zeros'])))
print("With None:", len(no_repo), "/", len(chain_stats_100['unique_zeros']), "(%s)" % (len(no_repo)/len(chain_stats_100['unique_zeros'])))
print(no_repo)


With GitHub: 586 / 595 (0.984873949579832)
With Other (GitLab): 3 / 595 (0.005042016806722689)
With None: 6 / 595 (0.010084033613445379)
['string-width-cjs', 'strip-ansi-cjs', 'wrap-ansi-cjs', '@types/mime', 'boolbase', '@types/cookie']
