In [1]:
import pandas as pd
import pypistats
import json
import numpy as np
import pyvis
import palettable as palette
from pyvis.network import Network


# Generate data

In [2]:
# Load edges             
from ecosystem_edges import edges
edges = pd.DataFrame(edges)

# Ensure no duplicates
edges = edges.drop_duplicates()

In [3]:
# Extract node list and assign URLs to PyPi
pkgs = list(set(edges["from"].to_list() + edges["to"].to_list()))
node_list = [{"id": pkg, "category": None, "url": f"https://pypi.org/project/{pkg}/"} for pkg in pkgs]
nodes = pd.DataFrame(node_list)

In [4]:
# Assign categories
# ------------------

# Load categories
from ecosystem_categories import categories, no_distro, generic_core, gis_core, generic_visuals, not_active, pygis_core, fundamental_core, generic_visuals

skipped = no_distro + not_active

def get_subcategory(name, category, categories):
    try:
        if name in categories[category]["vector"]:
            return "vector"
        if name in categories[category]["raster"]:
            return "raster"
        if name in categories[category]["generic"]:
            return "generic"
    except:
        print(name)
    
# Update categories
for category, data in categories.items():
    nodes.loc[nodes["id"].isin(data["pkgs"]), "category"] = category
    
nodes["downloads"] = None
    
# Update downloads
for idx, row in nodes.iterrows():
    name = row["id"]
    category = row["category"]
    nodes.loc[idx, "subcategory"] = get_subcategory(name, category, categories)
    
    if name in no_distro:
        continue
    r = pypistats.recent(name, "month", format="json")
    downloads = json.loads(r)["data"]["last_month"]
    nodes.loc[idx, "downloads"] = downloads
    
nodes["log10_downloads"] = np.log10(nodes["downloads"].astype(float))
nodes["log2_downloads"] = np.log2(nodes["downloads"].astype(float))

GEOS


# Visualize with Pyvis

In [23]:
# COLORS
color_palette = palette.wesanderson.GrandBudapest5_5.hex_colors

options = """
var options = {
    "nodes": {
        "font": {
            "multi": "html",
            "bold": "16px arial black"
        }
    }
}
"""

def get_color(category):
    # Color scheme
    colors = {"analysis / modelling": color_palette[0],
              "core / data structures": color_palette[1],
              "visualization": color_palette[2],
              "data extraction / processing": color_palette[3]}
    return colors[category]

def get_shape(name, nodes_df):
    shapes = {"vector": "dot",
              "raster": "dot", 
              "generic": "dot"}
    
    if name in gis_core or name in fundamental_core:
        return "star"
    
    if name in generic_core or name in generic_visuals:
        return "diamond"
    
    return shapes[nodes_df.loc[nodes_df["id"]==name, "subcategory"].values[0]]

def get_node_color(nodes, name):
    if name in gis_core or name in fundamental_core:
        return None
    try:
        return get_color(nodes.loc[nodes["id"]==name]["category"].values[0])
    except Exception as e:
        print(name)
        

def get_node_size(nodes, name, size_column):
    return nodes.loc[nodes["id"]==name][size_column].values[0]


#nt = Network('1000px', '1500px', notebook=True, bgcolor='#222222', font_color='white')
nt = Network('1000px', '1500px', notebook=True, directed=True)
nt.force_atlas_2based(central_gravity=0.02, overlap=0.3)

# Add nodes
for idx, node in nodes.iterrows():
    name = node["id"]
    
    if name in skipped:
        continue
    
    url = node["url"]
    nt.add_node(n_id=name, label=name, title=f'<a href="{url}" target="_blank">{name}</a>', 
                size=get_node_size(nodes, name, "log2_downloads"), 
                color=get_node_color(nodes, name=name), 
                font={"size":24}, shape=get_shape(name, nodes))

# Add nodes and edges
for idx, edge in edges.iterrows():
    source = edge["from"]
    target = edge["to"]
    
    if source in skipped or target in skipped:
        continue
        
    nt.add_edge(source, target, weight=2, color="grey")

# Add Legend Nodes
step = 80
x = -1900
y = 200

for i, key in enumerate(categories.keys()):
    nt.add_node(n_id=i, group=key, label=key, size=150, physics=False, x=x, y=f"{y + i*step}px", shape="box", widthConstraint=300, font={"size":30, "color": "white"},
               color=get_color(key)) 


# Add legend for symbols
i+=1
nt.add_node(n_id=i, label="Python GIS library", size=20, physics=False, x=x, y=f"{y + i*step}px", shape="dot", color="grey", font={"size":30}) 
i+=1
nt.add_node(n_id=i, label="Fundamental underlying\nC/C++ library", size=20, physics=False, x=x, y=f"{y + i*step*1.05}px", shape="star", color="grey", font={"size":30}) 
i+=1
nt.add_node(n_id=i, label="Generic library\nnot specific to GIS", size=20, physics=False, x=x, y=f"{y + i*(step*1.15)}px", shape="diamond", color="grey", font={"size":30}) 



In [26]:
len(nodes) - len(skipped)

123

In [24]:
#nt.show_buttons(filter_=['physics'])
nt.show("Python-ecosystem.html")
