In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

In [2]:
df = pd.read_csv("data/edgelist.csv")

In [3]:
network = df[["Person 1", "Person 2"]]

In [4]:
names = np.unique(np.append((network["Person 1"]), network["Person 2"]))
names

array(['Admin.1', 'Admin.2', 'Admin.3', 'Admin.4', 'Admin.5', 'Admin.6',
       'Admin.7', 'Admin.8', 'Admin.Dir', 'Admin.mgr1', 'Admin.mgr2',
       'Admin.mgr3', 'CEO', 'Production.1', 'Production.10',
       'Production.11', 'Production.12', 'Production.13', 'Production.14',
       'Production.2', 'Production.3', 'Production.4', 'Production.5',
       'Production.6', 'Production.7', 'Production.8', 'Production.9',
       'Production.Dir', 'Production.mgr1', 'Production.mgr2',
       'Production.mgr3', 'Production.mgr4', 'Sales.1', 'Sales.10',
       'Sales.2', 'Sales.3', 'Sales.4', 'Sales.5', 'Sales.6', 'Sales.7',
       'Sales.8', 'Sales.9', 'Sales.Dir', 'Sales.mgr1', 'Sales.mgr2',
       'Sales.mgr3'], dtype=object)

In [5]:
# Create adjacency matrix
new_style = pd.DataFrame(index=names, columns=names, data=np.zeros((len(names), len(names))))
for i, pair in network.iterrows():
    new_style.loc[pair[0], pair[1]] = 1
new_style

Unnamed: 0,Admin.1,Admin.2,Admin.3,Admin.4,Admin.5,Admin.6,Admin.7,Admin.8,Admin.Dir,Admin.mgr1,...,Sales.4,Sales.5,Sales.6,Sales.7,Sales.8,Sales.9,Sales.Dir,Sales.mgr1,Sales.mgr2,Sales.mgr3
Admin.1,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.4,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.5,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.Dir,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Admin.mgr1,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
G = nx.from_numpy_array(new_style.to_numpy())

labels = {i: name for i, name in enumerate(names)}
nx.set_node_attributes(G, labels, "label")
nx.strongly_connected_components(G)


NetworkXNotImplemented: not implemented for undirected type

## Ex 1: Count the number of connections between departments

In [6]:
df = pd.read_csv("data/edgelist.csv")
network = df[["Person 1", "Person 2"]]
names = np.unique(np.append((network["Person 1"]), network["Person 2"]))
all_attributes = []
for name in names:
    attributes = {}

    if name.__contains__("Sales"):
        attributes["category"] = "Sales"
        attributes["color"] = "salmon"
    elif name.__contains__("Admin"):
        attributes["category"] = "Admin"
        attributes["color"] = "aqua"
    elif name.__contains__("Production"):
        attributes["category"] = "Production"
        attributes["color"] = "lime"
    else:
        attributes["category"] = "CEO"
        attributes["color"] = "grey"
    
    if name.__contains__(".mgr"):
        attributes["seniority"] = "Manager"
    elif name.__contains__(".Dir"):
        attributes["seniority"] = "Director"
    elif name.__contains__("CEO"):
        attributes["seniority"] = "CEO"
    else:
        attributes["seniority"] = "Employee"
            
    all_attributes.append((
            name, attributes))
        
edges = [(pair[0], pair[1]) for _, pair in network.iterrows()]
G = nx.Graph()
G.add_nodes_from(all_attributes)
G.add_edges_from(edges)


In [7]:
from collections import Counter
connections = [(G.nodes[u].get("category", None), G.nodes[v].get("category", None)) for u, v in G.edges()]
dict(Counter(connections))

{('Admin', 'Admin'): 22,
 ('Admin', 'Production'): 4,
 ('Admin', 'Sales'): 1,
 ('Admin', 'CEO'): 2,
 ('CEO', 'Production'): 2,
 ('CEO', 'Sales'): 4,
 ('Production', 'Production'): 46,
 ('Production', 'Sales'): 2,
 ('Sales', 'Sales'): 57}

Admin - Sales --> 1 connection

Production - Sales --> 2 Connections

Admin - Production --> 4 Connections

# EX 2: The connections inside Sales Department

In [8]:
G_sales = nx.Graph()

# Iterate through the nodes in the original graph
for node, attributes in G.nodes(data=True):
    # Check if the node meets the attribute criteria
    if attributes.get("category") == "Sales":
        # Add the node and its attributes to the filtered graph
        G_sales.add_node(node, **attributes)

# Filter edges
sales_connections = network.apply(lambda x: ("Sales" in x[0]) & ("Sales" in x[1]), axis=1)
sales_edges = [(pair[0], pair[1]) for _, pair in network[sales_connections].iterrows()]
# Copy edges from the original graph to the filtered graph
G_sales.add_edges_from(sales_edges)

In [9]:
network

Unnamed: 0,Person 1,Person 2
0,Admin.mgr1,Admin.2
1,Sales.mgr3,Sales.8
2,Production.2,Production.3
3,Production.2,Production.7
4,Sales.1,Sales.2
...,...,...
135,Production.mgr1,Production.2
136,Sales.Dir,Sales.6
137,Admin.4,Production.11
138,Admin.mgr1,Admin.3


## Total Number of Connections

In [32]:
total_edges = G_sales.number_of_edges()
total_nodes = G_sales.number_of_nodes()
print("total employees in sales: ", total_nodes)
print("total connections in sales: ", total_edges)
Connection_ratio_sales = total_edges/total_nodes
print("connections per employee",Connection_ratio_sales)

total employees in sales:  14
total connections in sales:  57
connections per employee 4.071428571428571


## number of connections between employees by seniority

In [11]:
connections_by_seniority = [(G_sales.nodes[u].get("seniority", None), G_sales.nodes[v].get("seniority", None)) for u, v in G_sales.edges()]
dict(Counter(connections_by_seniority))

{('Employee', 'Employee'): 23,
 ('Employee', 'Director'): 10,
 ('Employee', 'Manager'): 18,
 ('Director', 'Manager'): 3,
 ('Manager', 'Manager'): 3}

### Comparing density of departments

In [15]:
G_production = nx.Graph()

# Iterate through the nodes in the original graph
for node, attributes in G.nodes(data=True):
    # Check if the node meets the attribute criteria
    if attributes.get("category") == "Production":
        # Add the node and its attributes to the filtered graph
        G_production.add_node(node, **attributes)

# Filter edges
Production_connections = network.apply(lambda x: ("Production" in x[0]) & ("Production" in x[1]), axis=1)
Production_edges = [(pair[0], pair[1]) for _, pair in network[Production_connections].iterrows()]
# Copy edges from the original graph to the filtered graph
G_production.add_edges_from(Production_edges)

In [16]:
G_Admin = nx.Graph()

# Iterate through the nodes in the original graph
for node, attributes in G.nodes(data=True):
    # Check if the node meets the attribute criteria
    if attributes.get("category") == "Admin":
        # Add the node and its attributes to the filtered graph
        G_Admin.add_node(node, **attributes)

# Filter edges
Admin_connections = network.apply(lambda x: ("Admin" in x[0]) & ("Admin" in x[1]), axis=1)
Admin_edges = [(pair[0], pair[1]) for _, pair in network[Admin_connections].iterrows()]
# Copy edges from the original graph to the filtered graph
G_Admin.add_edges_from(Admin_edges)

In [17]:
print(nx.density(G))
print(nx.density(G_sales))
print(nx.density(G_production))
print(nx.density(G_Admin))

0.13526570048309178
0.6263736263736264
0.26900584795321636
0.3333333333333333


In [25]:
betweenness_centrality = nx.betweenness_centrality(G, normalized=True, endpoints=False, weight=None)
specific_nodes = ["Admin.1", "Admin.4", "Admin.5","Production.7","Production.11","Production.3","Production.mgr1","Sales.1","Sales.mgr1"]
betweenness_centrality_specific_nodes = {node: centrality for node, centrality in betweenness_centrality.items() if node in specific_nodes}


In [26]:
betweenness_centrality_specific_nodes

{'Admin.1': 0.05824089490756158,
 'Admin.4': 0.14519046786989367,
 'Admin.5': 0.02805293471960139,
 'Production.11': 0.10322161220064398,
 'Production.3': 0.016265462932129597,
 'Production.7': 0.08082456249521637,
 'Production.mgr1': 0.2530712635708413,
 'Sales.1': 0.10829056822090877,
 'Sales.mgr1': 0.09387367114405298}

In [27]:
betweenness_centrality

{'Admin.1': 0.05824089490756158,
 'Admin.2': 0.001936026936026936,
 'Admin.3': 0.013053835230868721,
 'Admin.4': 0.14519046786989367,
 'Admin.5': 0.02805293471960139,
 'Admin.6': 0.04079869477796112,
 'Admin.7': 0.0,
 'Admin.8': 0.010471380471380468,
 'Admin.Dir': 0.11056486310074826,
 'Admin.mgr1': 0.03559632360669044,
 'Admin.mgr2': 0.008960237293570629,
 'Admin.mgr3': 0.047397269574303076,
 'CEO': 0.32304711434718936,
 'Production.1': 0.0135022385022385,
 'Production.10': 0.02195920604120247,
 'Production.11': 0.10322161220064398,
 'Production.12': 0.0,
 'Production.13': 0.0,
 'Production.14': 0.06396619126565652,
 'Production.2': 0.0027396985730319067,
 'Production.3': 0.016265462932129597,
 'Production.4': 0.000677409010742344,
 'Production.5': 0.0,
 'Production.6': 0.0,
 'Production.7': 0.08082456249521637,
 'Production.8': 0.0,
 'Production.9': 0.011348474757565669,
 'Production.Dir': 0.07651408025317397,
 'Production.mgr1': 0.2530712635708413,
 'Production.mgr2': 0.034381760362

In [33]:
total_edges = G_production.number_of_edges()
total_nodes = G_production.number_of_nodes()
print("total employees in sales: ", total_nodes)
print("total connections in sales: ", total_edges)
Connection_ratio_prod = total_edges/total_nodes
print("connections per employee",Connection_ratio_prod)

total employees in sales:  19
total connections in sales:  46
connections per employee 2.4210526315789473


In [34]:
total_edges = G_Admin.number_of_edges()
total_nodes = G_Admin.number_of_nodes()
print("total employees in sales: ", total_nodes)
print("total connections in sales: ", total_edges)
Connection_ratio_adm = total_edges/total_nodes
print("connections per employee",Connection_ratio_adm)

total employees in sales:  12
total connections in sales:  22
connections per employee 1.8333333333333333
