## 1. Reading a from file and convert it into different format.

In [None]:
# Import libraries
import pandas as pd

import networkx as nx
import matplotlib.pyplot as plt

import numpy as np

import operator
import sys

The “HW2_ who_talks_to_whom.xlsx” contains all the messages from all kinds of communications between the 2017/18 MSc Business Analytics class, for a particular week. The Excel file is converted to a CSV file in this exercise. For the sake of simplicity, we only use the sheet "sent".

In [None]:
place_holder = './'
# The file name
csvfile = "HW2_who_talks_to_whom_sent.csv"

### 1.1 We read from the file and start doing some preprocessing.

In [None]:
# Import the data
sent = pd.read_csv(place_holder + csvfile, index_col = 0)
sent

We then drop the irrelevant columns.

In [None]:
col_names = sent.columns[81:86]
sent.drop(col_names, axis=1, inplace = True)

# starting from 81 -> 85, axis-1 is the column,
# Make NaN the values with 0
sent = sent.fillna(0)
sent = sent.replace("-",0)
sent.index = np.arange(0, len(sent))
sent.columns = [int(x)-1 for x in sent.columns]

# change the column name from the type string to int
sent[sent.columns] = sent[sent.columns].astype(int)

# change the contents to numeric in the sent dataframe
sent.apply(pd.to_numeric)
sent

### 1.2 Next, we print the network using different formats: an edge list, adjacency list and adjacency matrix.

To do that, we have to rely on Networkx.

In [None]:
# create the adjaceny matrix in Networkx format
dG_sent = nx.from_pandas_adjacency(sent, create_using = nx.DiGraph())

In [None]:
file = 'edgelist1.txt'

data = pd.read_csv(file,delimiter =' ',header=None)
data['hello'] = range(data.shape[0])
data.columns = ['source', 'target', 'weight', 'hello']
g = nx.from_pandas_edgelist(data,edge_attr=True)

g.edges(data=True)

#### A. Output as edge list

In [None]:
nx.write_weighted_edgelist(dG_sent, sys.stdout)

In [None]:
# We can also use pandas to do the conversion
nx.to_pandas_edgelist(dG_sent)

#### B. Output as adjacency list

In [None]:
nx.write_adjlist(dG_sent, sys.stdout)

#### C. Output as adjaceny matrix. This relies on numpy.

In [None]:
nx.to_numpy_matrix(dG_sent)

Details about how the conversion between libraries can be done can be found: https://networkx.github.io/documentation/stable/reference/convert.html

Note that conversion to/from the adjacency list using Numpy, Pandas is not supported.

### 1.3 Additional Exercise: Read an node-edge incidence matrix from a file as an undirected graph.

The file used in this example is 'incmat1.txt'. After loading the matrix the information about the graph will be displayed.

In [None]:
G0_txt = np.loadtxt(place_holder + 'incmat1.txt')
M0 = np.matrix(G0_txt, dtype=int)

# convert incidence matrix to adjacency matrix by multiplying with its transpose
AM0 = (np.dot(M0, M0.T) > 0).astype(int)

# Mark 0 along the diagonal
np.fill_diagonal(AM0, 0)
G0 = nx.from_numpy_matrix(AM0)

# Print the graph information
print("\nPrinting G0")
nx.write_weighted_edgelist(G0, sys.stdout)

BUT WHY?

For example, given the following incidence matrix:

$$\begin{bmatrix} 
A1 & A2 \\ 
B1 & B2 \\ 
C1 & C2 \end{bmatrix}$$

We obtain the corresponding adjacency matrix by

$$\begin{bmatrix} 
A1 & A2 \\ 
B1 & B2 \\ 
C1 & C2  \end{bmatrix}
\begin{bmatrix} 
A1 & B1 & C1 \\ 
A2 & B2 & C2 \end{bmatrix}
= 
\begin{bmatrix} 
A1A1+A2A2 & A1B1 + A2B2 & A1C1 + A2C2\\ 
B1A1+B2A2 & B1B1 + B2B2 & B1C1 + B2C2\\  
C1A1+C2A2 & C1B1 + C2B2 & C1C1 + C2C2\\\end{bmatrix}$$

## 2. Plotting the network.

### 2.1 Try out different layout

In [None]:
plt.subplots(figsize=(10, 10))
plt.title("Spring Layout")
nx.draw_spring(dG_sent, with_labels = True, \
                node_color='skyblue', node_size=200, \
                edge_color='black')

In [None]:
plt.subplots(figsize=(10, 10))
plt.title("Circular Layout")
nx.draw_circular(dG_sent, with_labels = True, \
                node_color='skyblue', node_size=200, \
                edge_color='black')

In [None]:
plt.subplots(figsize=(10, 10))
plt.title("Random Layout")
nx.draw_random(dG_sent, with_labels = True, \
               node_color='skyblue', node_size=200, \
               edge_color='black')

Note that the label above if off by 1. 

### 2.2 Change the node size and color based on the communication

First, we need to rename the node so that it starts from 1 instead of 0. We use the lambda function here.

A **lambda function** is a small *anonymous* function taking any number of arguments but only one expression. It is useful as an anonymous function inside another function without being defined separately.

In [None]:
# rename the label in-place
nx.relabel_nodes(dG_sent, lambda x: x+1, copy=False)

We adjust the node size and thickness of the edge based on the communication count.

In [None]:
d_out = dict(dG_sent.out_degree())
for u,v in dG_sent.edges():
    # green edge if they communicated more than 5 times
    if dG_sent[u][v]['weight'] > 5 :
        dG_sent[u][v]['color'] = 'green'
    
    # blue otherwise
    else:
        dG_sent[u][v]['color'] = 'blue'

plt.figure(figsize = (15,10))
    
# Nodes with more out-going edge will be larger,
# Edge with a larger value for 'talks' will also be thicker
# alpha sets the transparency
nx.draw(dG_sent, pos = nx.random_layout(dG_sent), \
    with_labels = True, node_color = 'red', \
    node_size = [v * 50 for v in d_out.values()], \
    width = [dG_sent[u][v]['weight'] / 10 for u,v in dG_sent.edges()], \
    edge_color = [dG_sent[u][v]['color'] for u,v in dG_sent.edges()], \
    linewidths = 5, alpha = 0.5, font_size = 10)    

## 3. Centrality measures

Lets compute centrality measures using inbuilt functions.

In [None]:
G1 = nx.read_edgelist('social_network.txt',nodetype=str,delimiter=',')

for u, v, d in G1.edges(data=True):
    d['weight'] = 2

In [None]:
# Read an un-directed graph from a list of edges
plt.figure(figsize=(10,10))
pos = nx.spring_layout(G1)
labels = nx.get_edge_attributes(G1,'weight')
nx.draw(G1, pos, with_labels = True)
nx.draw_networkx_edge_labels(G1, pos, edge_labels = labels)

In [None]:
centrality = nx.betweenness_centrality(G1)
nx.draw(G1,pos=pos,node_color=list(centrality.values()))

In [None]:
centrality = nx.degree_centrality(G1)
nx.draw(G1,pos=pos,node_color=list(centrality.values()))

In [None]:
centrality = nx.eigenvector_centrality(G1)
nx.draw(G1,pos=pos,node_color=list(centrality.values()))

## 4. Community detection 

In [None]:
partition = nx.community.girvan_newman(G1)
comms = tuple(sorted(c) for c in next(partition))

In [None]:
colors = 'rgb'
for i, nodes in enumerate(comms):
    print(i)
    nx.draw_networkx_nodes(G1, pos=pos, nodelist=nodes, node_color=colors[i])
    
nx.draw_networkx_edges(G1,pos=pos)

## 5. Additional Exercises

We need the community library from Networkx.

In [None]:
from networkx.algorithms import community

### 5.1 Using your judgement and based on the various exercises we did in class, represent the network the best way possible.

In [None]:
# Loading the network
G_social = nx.davis_southern_women_graph()


We then change the width of the edges based on the weight, and the size of the nodes based on the out-degree.

In [None]:

# Representing the size of the nodes based on their degree
degree = [G_social.degree()[node]*100 for node in G_social.nodes()]


Plotting the network.

In [None]:
plt.subplots(figsize = (20,20))

pos = nx.spring_layout(G_social)
nx.draw(G_social, pos = pos, font_size = 12, with_labels = True, 
               node_size = degree, 
               edge_color = 'grey', node_color = 'purple')

plt.axis("off")

plt.show()

### 5.2 Choose and run an appropriate community detection algorithm to identify clusters.

In [None]:
color_list = ['r','y']

communities = community.kernighan_lin_bisection(G_social, max_iter=100)



In [None]:
pos = nx.spring_layout(G_social)
nx.draw(G_social,pos, with_labels=True, node_size =100, node_color='w', node_shape = '.')

for i in range(len(communities)):
    nx.draw_networkx_nodes(G_social, pos, nodelist=communities[i], node_color=color_list[i])

Plotting the Network with the communities in color purple and yellow.

In [None]:
# Communities

color_map = ['purple'] * len(G_social.nodes())

nodes = list(G_social.nodes)

# We plot the communities in different colours
for node in nodes:
    if node in communities[0]:
        color_map[nodes.index(node)] = 'yellow'

In [None]:
plt.subplots(figsize = (20,20))

nx.draw(G_social, pos = pos, font_size = 12, with_labels = True, \
               node_size = degree,
               edge_color = 'grey', node_color = color_map)

plt.axis("off")

plt.show()