# Learning from networks - Stonks

In [17]:
import networkx as nx
import extended_networkx as ex
import scipy
import numpy as np

## Load graph and compute market capitalization

First of all let's start by loading the graph file and compute the market capitalization of every node.

In [10]:
G = nx.read_gml("out_graph.gml")

def compute_capitalization(G: nx.Graph):
    """
    Adds the 'capitalization' attribute to every node, which is the sum of the incoming edges weighs.
    """
    for node in G.nodes():
        capitalization = 0
        for edge in G.in_edges(node):
            capitalization += G.get_edge_data(*edge)["weight"]
        G.nodes[node]["capitalization"] = capitalization

compute_capitalization(G)

Now let's print the top 20 capitalization nodes.

In [11]:
k = 20
print(f"Top {k} nodes with highest capitalization: {ex.max_k_nodes(G, k, 'capitalization')}")

Top 20 nodes with highest capitalization: ['CPIN', 'AAPL', 'MSFT', 'AMZN', 'ADRO', 'GOOGL', 'FB', 'GOOG', 'TSLA', 'NVDA', 'JPM', 'UNVR', 'JNJ', 'V', 'UNH', 'HD', 'PG', 'BAC', 'MA', 'PYPL']


In [12]:
sub_G = ex.connected_random_subgraph(G, k)
print(sub_G)
for node in sub_G.nodes():
    print(f"{node}: {len(sub_G.in_edges(node))}")

There are 3 components with more than 20 nodes.
DiGraph with 20 nodes and 27 edges
288: 1
FII: 2
TINS: 2
4543: 1
6USD3588: 2
4547: 2
VSS: 0
LU: 1
92USD1: 1
MGAM: 2
2541: 2
VT: 0
ALNY: 1
1711: 2
TGR: 2
3USDUSD296: 1
UWL: 2
SNGSP: 1
669: 1
6USD1288: 1


In [13]:
b_centralities = ex.betweenness_centrality_percent(G, percentage=0.02)
print(sorted(b_centralities.items(), key=lambda t: t[1], reverse=True)[:k])

[('VXF', 0.0002176437286573036), ('EZU', 4.8391164508743804e-05), ('CEY', 3.8992559523899505e-05), ('VHT', 3.456511750624558e-05), ('HART', 2.8972559168156405e-05), ('VGT', 2.617627999911182e-05), ('IGM', 2.5088838100038925e-05), ('VOO', 2.2913954301893136e-05), ('SMIN', 2.0195349554210896e-05), ('VOE', 1.530186100838287e-05), ('FVL', 1.4292093530672327e-05), ('BMED', 1.3515349317048832e-05), ('EDEN', 1.3204651631599433e-05), ('BFIT', 1.0253023619830148e-05), ('PFFD', 9.553953827569001e-06), ('VOX', 8.466511928496107e-06), ('IG', 4.505116439016277e-06), ('HYDR', 3.8837210681174805e-06), ('IPO', 3.806046646755131e-06), ('CTEC', 2.951628011769285e-06)]


In [14]:
def clustering_coefficient(G):
    return nx.clustering(G, weight="weight")

clustering_coeff = clustering_coefficient(G)
print(sorted(clustering_coeff.items(), key=lambda t: t[1], reverse=True)[:k])

[('RWVG', 2.220278602733271e-05), ('UNA', 1.9880340687835788e-05), ('SSI', 1.5219529225139582e-05), ('HPG', 1.2501223578956097e-05), ('PDR', 1.2427193087025836e-05), ('CRHl', 1.2137344349100604e-05), ('VIC', 1.1967758246053812e-05), ('KRZ', 1.1909451311225256e-05), ('BCM', 1.1672895376409549e-05), ('RWGV', 1.1043518322844037e-05), ('VHM', 1.0180368464746611e-05), ('VCI', 8.455830606685144e-06), ('NVL', 8.304115970363736e-06), ('HSG', 8.276992510958094e-06), ('GEX', 7.798399073501852e-06), ('EXK', 7.639429907146357e-06), ('VCB', 7.093919710666592e-06), ('NOVO', 6.784985924541269e-06), ('JETl', 6.676593093953429e-06), ('DGC', 6.654628072496844e-06)]


In [18]:
def closeness_centrality_matrix(G):
    A = nx.adjacency_matrix(G).tolil()  # matrix converted into list of lists
    D = scipy.sparse.csgraph.floyd_warshall(A, directed=False, unweighted=False)

    n = D.shape[0]
    centralities = {}
    for r in range(0, n):

        cc = 0.0

        possible_paths = list(enumerate(D[r, :]))
        shortest_paths = dict(filter(lambda x: not x[1] == np.inf, possible_paths))

        total = sum(shortest_paths.values())
        n_shortest_paths = len(shortest_paths) - 1.0
        if total > 0.0 and n > 1:
            s = n_shortest_paths / (n - 1)
            cc = (n_shortest_paths / total) * s
        centralities[r] = cc
    return centralities

c_centralities = closeness_centrality_matrix(sub_G)
print(sorted(c_centralities.items(), key=lambda t: t[1], reverse=True)[:k])

[(11, 4.871653647037567e-07), (1, 4.772611838345939e-07), (10, 4.721475281192233e-07), (15, 4.684478678249572e-07), (13, 4.650478864276933e-07), (2, 4.6375752084695123e-07), (4, 4.602320213531264e-07), (14, 4.521128784735396e-07), (5, 4.4944384015364705e-07), (9, 3.973707305133562e-07), (6, 3.887290621220763e-07), (16, 3.631035323648015e-07), (8, 3.0958707516038663e-07), (17, 2.9401120910160723e-07), (7, 2.9237211648718743e-07), (19, 2.867771489070786e-07), (0, 2.812883305557203e-07), (12, 1.0059935669163293e-07), (18, 9.29467138296442e-08), (3, 7.680938787226027e-08)]
