# Learning from networks - Stonks

In [1]:
import networkx as nx
import extended_networkx as ex

## Load graph and compute market capitalization

First of all let's start by loading the graph file and compute the market capitalization of every node.

In [2]:
G = nx.read_gml("out_graph.gml")

def compute_capitalization(G: nx.Graph):
    """
    Adds the 'capitalization' attribute to every node, which is the sum of the incoming edges weighs.
    """
    for node in G.nodes():
        capitalization = 0
        for edge in G.in_edges(node):
            capitalization += G.get_edge_data(*edge)["weight"]
        G.nodes[node]["capitalization"] = capitalization

compute_capitalization(G)

Now let's print the top 20 capitalization nodes.

In [3]:
k = 20
print(f"Top {k} nodes with highest capitalization: {ex.max_k_nodes(G, k, 'capitalization')}")

Top 20 nodes with highest capitalization: ['CPIN', 'AAPL', 'MSFT', 'AMZN', 'ADRO', 'GOOGL', 'FB', 'GOOG', 'TSLA', 'NVDA', 'JPM', 'UNVR', 'JNJ', 'V', 'UNH', 'HD', 'PG', 'BAC', 'MA', 'PYPL']


In [4]:
sub_G = ex.connected_random_subgraph(G, k)
print(sub_G)
for node in sub_G.nodes():
    print(f"{node}: {len(sub_G.in_edges(node))}")

There are 3 components with more than 20 nodes.
DiGraph with 20 nodes and 19 edges
BBGUSDUSDTSXSNR3: 1
BBGUSDUSD59HJP87: 1
BBGUSDUSD9TUSD3SB1: 1
BBGUSDUSDFFFM1V5: 1
BBGUSDUSDPYN2GP1: 1
BBGUSDUSDSUSD3MDJ8: 1
BBGUSD11YPMQY4: 1
BBGUSDUSDP5DR9F5: 1
BBGUSDUSDZSK9LY4: 1
PFXF: 0
BBGUSDUSDCCPD5X2: 1
BBGUSDUSD5ZV53B7: 1
BBGUSDUSDJPYXQD8: 1
BBGUSDUSDHMSCSD7: 1
BBGUSD11XUSD4LV4: 1
BBGUSDUSDJ2JZLQ1: 1
BBGUSDUSDJ87QZB6: 1
BBGUSDUSDWMLKP91: 1
BBGUSD12JBUSDSD3: 1
BBGUSDUSDNQ59KWUSD: 1


In [5]:
b_centralities = ex.betweenness_centrality_percent(G, percentage=0.02)
print(sorted(b_centralities.items(), key=lambda t: t[1], reverse=True)[:k])

[('CPI', 0.0007963181678068081), ('VWO', 0.0006475716508979087), ('RALS', 0.00018315628557242039), ('EEM', 0.0001461055865825796), ('BBCA', 9.134511952212314e-05), ('FM', 6.384837435985138e-05), ('FAB', 6.314930456759022e-05), ('IGM', 3.774976878210191e-05), ('EPP', 1.97293030260368e-05), ('CEY', 1.9030233233775655e-05), ('JKH', 1.8253489020152158e-05), ('HART', 1.5612558693832272e-05), ('DSI', 1.4059070266585279e-05), ('ECOW', 1.390372142386058e-05), ('THD', 1.2971628367512384e-05), ('CSB', 1.0097674777105448e-05), ('ACES', 8.00046540032201e-06), ('BFIT', 5.747907180813871e-06), ('JUST', 4.505116439016277e-06), ('CWEB', 4.116744332204529e-06)]


In [8]:
def clustering_coefficient(G):
    return nx.clustering(G, weight="weight")

clustering_coeff = clustering_coefficient(G)
print(sorted(clustering_coeff.items(), key=lambda t: t[1], reverse=True)[:k])

[('RWVG', 2.220278602733271e-05), ('UNA', 1.9880340687835788e-05), ('SSI', 1.5219529225139582e-05), ('HPG', 1.2501223578956097e-05), ('PDR', 1.2427193087025836e-05), ('CRHl', 1.2137344349100604e-05), ('VIC', 1.1967758246053812e-05), ('KRZ', 1.1909451311225256e-05), ('BCM', 1.1672895376409549e-05), ('RWGV', 1.1043518322844037e-05), ('VHM', 1.0180368464746611e-05), ('VCI', 8.455830606685144e-06), ('NVL', 8.304115970363736e-06), ('HSG', 8.276992510958094e-06), ('GEX', 7.798399073501852e-06), ('EXK', 7.639429907146357e-06), ('VCB', 7.093919710666592e-06), ('NOVO', 6.784985924541269e-06), ('JETl', 6.676593093953429e-06), ('DGC', 6.654628072496844e-06)]
