In [4]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv 
import edgar
import os
import pickle
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go


In [None]:
df=pickle.load(open('df_All',"rb"))

In [None]:
# entity names as labels
labels = list(set(df['Entity name'].str.lower()))


In [3]:
def Adj_weight(df):
    dummies = pd.get_dummies(df['CUSIP']).astype(float)
    weights=dummies.T*np.asarray(df['(x$1000)']).astype(float)
    df_ = pd.concat([df[['CIK']],weights.T],axis=1)
    weights = df_.groupby(['CIK']).sum()
    v=np.dot(weights,weights.T)
    v=np.tril(v,-1)
    return v,weights

In [None]:
adj, weights = Adj_weight(df)
q1=[adj, weights]
with open("C:/Users/michi/Desktop/MDA/2019_1","wb") as f:
    pickle.dump(q1, f)
f.close()

In [6]:
q2=pickle.load(open('q2',"rb"))

In [8]:
def centrality_attr(G):
    bb = nx.betweenness_centrality(G)
    cc = nx.closeness_centrality(G)
    dc = nx.degree_centrality(G)
    centrality = {j:{'betweenness':bb[j],'closeness': cc[j],'degree': dc[j]} for i,j in enumerate(G.nodes)}
    nx.set_node_attributes(G, centrality)

    

In [17]:
#TODO: instead of CIK numbers company names
def plot_network(adj,weight,centrality_metric):
    np.random.seed(3)
    # define network in networkx
    G = nx.from_numpy_matrix(adj)
    # labels are CIK numbers
    labels={i: weight.index[i] for i in G.nodes}
    G=nx.relabel_nodes(G, labels)
    
    # calculate centrality metrics + assign to node attributes
    centrality_attr(G)
    
    ranking = sorted(nx.degree_centrality(G).items(), key=lambda x: x[1], reverse=True)
    top_central_txt ='<b>Most central companies</b>'+ '<br>' +'CIK ' +str(ranking[0][0]) +' : ' + str(ranking[0][1]) + '<br>'+' CIK ' +str(ranking[1][0]) +' : ' + str(ranking[1][1]) + '<br>'+' CIK ' +str(ranking[2][0]) +' : ' + str(ranking[2][1])
    bottom_central_txt ='<b>Most decentral company</b>' +'<br>'+ 'CIK ' +str(ranking[-1][0]) +' : ' + str(np.round(ranking[-1][1],4))
    connectivity = '<b>Connectivity: </b>'+ '<br>'+ 'Diameter: ' + str(nx.diameter(G)) +'<br>' + 'Node connectivity: ' + str(nx.node_connectivity(G)) + '<br>' +'Edge connectivity: ' + str(nx.edge_connectivity(G))
    
    # positions of nodes according to spring algorithm
    pos=nx.spring_layout(G, dim=2)

    #we need to seperate the X,Y coordinates for Plotly
    x_nodes = [pos[i][0] for i in G.nodes]# x-coordinates of nodes
    y_nodes = [pos[i][1] for i in G.nodes]# y-coordinates
    
    # hover info for node
    node_info=['CIK: ' + str(i)+'<br>'+str(centrality_metric)+' centrality :'+str(np.round(G.nodes[i][centrality_metric],4)) for i in (G.nodes)]
    #edge_info=['MV of shared sec.:' + str(i) for i in adj]

    # traces for edges: different weights -> different widths of lines (I scale them to 0-1 so that abs differences are not to big)
    edge_list= G.edges
    total_weight=[]
    for edge in edge_list:
        total_weight.append(list(G[edge[0]][edge[1]].values())[0])
    edges_list=[ dict(type='scatter',
                 x=[pos[edge[0]][0],pos[edge[1]][0]],
                 y=[pos[edge[0]][1],pos[edge[1]][1]],
                  mode='lines',hoverinfo='skip',text=np.max(G[edge[0]][edge[1]]['weight']),
                  line=dict(width=(list(G[edge[0]][edge[1]].values())[0]-np.min(total_weight))/(np.max(total_weight)-np.min(total_weight))*4+0.05,color='blue'))  for edge in edge_list]

    # trace3_list = []
    # a=[]
    # middle_node_trace = go.Scatter(
    #     x=[],
    #     y=[],
    #     text=[],
    #     mode='markers',
    #     hoverinfo='text',
    #     marker=go.Marker(
    #         opacity=0
    #     )
    # )
    # for edge in G.edges(data=True):
    #     trace3=go.Scatter(
    #         x=[],
    #         y=[],
    #         mode='lines',
    #         line=dict(color='rgb(210,210,210)', width=edge[2]['weight']),
    #         hoverinfo='none'
    #     )
    #     x0, y0 = pos[edge[0]]
    #     x1, y1 = pos[edge[1]]
    #     trace3['x'] += (x0, x1, None)
    #     trace3['y'] += (y0, y1, None)
    #     trace3_list += trace3

    #     middle_node_trace['x']+=(x0+x1)/2
    #     middle_node_trace['y']+=(y0+y1)/2
    #     a.append(edge[2]['weight'])
    # middle_node_trace['text'] =a

    # edges_list.append(trace3_list)
    #txt='Most central companies:<br>1. %s <br> 2. %s <br> 3. %s' %(d[0],d[1],d[2])
    
    
    # trace for nodes, different node sizes -> choose centrality alg
    # node sizes are multiplied by a number so that abs differences are bigger
    trace_nodes = go.Scatter(x=x_nodes,
                             y=y_nodes,
                            #z=z_nodes,
                             mode='markers',
                             marker=dict(symbol='circle',size=[G.nodes[i][centrality_metric]*20 for i in G.nodes],colorscale=['lightgreen','magenta']), line=dict(color='red', width=0.5),
                             text= node_info,
                             hoverinfo='text')

    axis = dict(showbackground=True,
                showline=False,
                zeroline=False,
                showgrid=False,
                showticklabels=False,
                title='')
    #layout for the plot
    layout = go.Layout(title="Network with shared positions of investment companies",
                    width=650,
                    height=625,
                    showlegend=False,
                    xaxis=dict(autorange=True, showgrid=False, ticks='', showticklabels=False),
                    yaxis=dict(autorange=True, showgrid=False, ticks='', showticklabels=False),
                    margin=dict(t=100),
                    hovermode='x',
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='rgba(0,0,0,0)',
                      annotations=[
            go.layout.Annotation(
                text=top_central_txt,
                align='left',
                showarrow=False,
                xref='paper',
                yref='paper',
                x=1.1,
                y=1,
                bordercolor='black',
                borderwidth=1
            ),
            go.layout.Annotation(
                text=bottom_central_txt,
                align='left',
                showarrow=False,
                xref='paper',
                yref='paper',
                x=1.1,
                y=0.85,
                bordercolor='black',
                borderwidth=1
            ),
                go.layout.Annotation(
                text=connectivity,
                align='left',
                showarrow=False,
                xref='paper',
                yref='paper',
                x=1.1,
                y=0.76,
                bordercolor='black',
                borderwidth=1
            )
        ])
        

    #Include the traces, create a figure
    data = edges_list+[trace_nodes ]
    fig = go.Figure(data=data, layout=layout)

    fig.show()



In [19]:
plot_network(q2[0],q2[1],'closeness')