<a href="https://colab.research.google.com/github/SDS-AAU/SDS-master/blob/master/M2/notebooks/M2_Bipartite_graphs_in_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Similarity and Bipartite Networks with Python and NetworkX
This notebook is an introduction to the concept and syntax around similarity and bipartite networks

For a more detailed intro to advanced network structures consider Daniel Hain's videos (in R) 
* [Similarity networks and more](https://www.loom.com/share/7668a71c95f941a1a17148e45ba83689)
* [Multimodla networks](https://www.loom.com/share/2fdf16a87a9d4eac81d50cef0b55ae3b)


In [None]:
# Packaging

import pandas as pd
import seaborn as sns
import networkx as nx
import numpy as np

import matplotlib.pyplot as plt

from sklearn.metrics.pairwise import cosine_distances

sns.set(color_codes=True, rc={'figure.figsize':(10,8)})

## Similarity networks

Can be constructed by mapping similarity between all observarions.
Here we are going to use cosine distances

In [None]:
# load cars data
data = pd.read_csv('https://gist.githubusercontent.com/ZeccaLehn/4e06d2575eb9589dbe8c365d61cb056c/raw/64f1660f38ef523b2a1a13be77b002b98665cdfe/mtcars.csv')

In [None]:
data.head()

In [None]:
data_num = data.iloc[:,1:]

In [None]:
# Calculate distances into a square matrix
dist = cosine_distances(data_num,data_num)

In [None]:
# calculate a cutoff (for a less crowded network)
perc = np.percentile(dist, 60)

In [None]:
# create NW
G = nx.from_numpy_array(dist)

In [None]:
# add names

attributes_dict=data.iloc[:,0].T.to_dict()
nx.set_node_attributes(G, attributes_dict, 'model')

In [None]:
G.edges(data=True)

In [None]:
print(nx.info(G))

In [None]:
# Get rid of low-weight edges
G_sub = nx.edge_subgraph(G, [(u,v) for u,v,d in G.edges(data=True) if d['weight'] > perc])

In [None]:
print(nx.info(G_sub))

In [None]:
# identify communities (optional)
import community as community_louvain

partition = community_louvain.best_partition(G_sub)
nx.set_node_attributes(G_sub, partition, 'partition')

In [None]:
nx.draw_kamada_kawai(G_sub, 
               node_color=list(partition.values()), 
               with_labels = True, 
               labels=attributes_dict, 
               font_color='r')

In [None]:
# For visualization
!pip install -U bokeh
!pip install -q holoviews

In [None]:
# Import the libraries and link to the bokeh backend
import holoviews as hv
from holoviews import opts
hv.extension('bokeh')
from bokeh.plotting import show

# Setting the default figure size a bit larger
defaults = dict(width=750, height=750, padding=0.1,
                xaxis=None, yaxis=None)
hv.opts.defaults(
    opts.EdgePaths(**defaults), opts.Graph(**defaults), opts.Nodes(**defaults))

In [None]:
graph = hv.Graph.from_networkx(G_sub, nx.layout.fruchterman_reingold_layout).opts(
                                                                        tools=['hover'],
                                                                        #directed=True,
                                                                        edge_alpha=0.2,
                                                                        #node_size='cent_degree',
                                                                        node_color='partition', cmap='Set1',
                                                                        legend_position='right'
                                                                        )

labels = hv.Labels(graph.nodes, ['x', 'y'], 'model')

show(hv.render((graph * labels.opts(text_font_size='8pt', text_color='black', bgcolor='white'))))

## Multi-modal networks
In this example we are going to look at bipartite networks

In [None]:
people = ['Jesper', 'Pernille', 'Morten', 'Lise', 'Christian', 'Mette', 'Casper', 'Dorte', 'Jacob', 'Helle']
places = ['Yoga House', 'Crossfit', 'Jazz Club', 'Jomfru Anne Gade']

In [None]:
# some more imports that will be useful
from networkx.algorithms import bipartite
import itertools
import random

In [None]:
# Creating a random bipartite network of people and places
combinations = list(itertools.product(people, places))
connections = random.sample(combinations, 15)

In [None]:
c0 = set([c[0] for c in connections])
c1 = set([c[1] for c in connections])

In [None]:
B = nx.Graph()

In [None]:
# add nodes and edges in their modes
B.add_nodes_from(c0, bipartite=0)
B.add_nodes_from(c1, bipartite=1)
B.add_edges_from(connections)

In [None]:
# very clunky visualization of 2-mode networks (unfortunately)
l, r = nx.bipartite.sets(B)
pos = {}

# Update position for node from each group
pos.update((node, (1, index)) for index, node in enumerate(l))
pos.update((node, (2, index)) for index, node in enumerate(r))

nx.draw(B, pos=pos, with_labels=True)
plt.show()

In [None]:
# projecting onto people
B_people = bipartite.weighted_projected_graph(B, c0)

In [None]:
# projecting onto places
B_places = bipartite.weighted_projected_graph(B, c1)

In [None]:
nx.draw(B_people, with_labels=True)

In [None]:
nx.draw(B_places, with_labels=True)

In [None]:
# pull edges
edges_df = nx.to_pandas_edgelist(B)

In [None]:
# create matrix from edges
adj_df = pd.crosstab(edges_df.source, edges_df.target)

In [None]:
# Projecting with dot-product as alternative
pd.DataFrame(np.dot(adj_df, adj_df.T), index=adj_df.index, columns=adj_df.index)

In [None]:
nx.to_pandas_adjacency(B_people)