In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import warnings
warnings.filterwarnings('ignore')
from scipy.spatial import distance
from sklearn.metrics import pairwise_distances
from scipy.sparse import csr_matrix

### Read network

In [55]:
# Load data
# First column is User ID while second column is Venue ID 
G_checkin = nx.read_edgelist('NYC_restaurant_checkin/edges.csv', delimiter=",")

In [56]:
#check if it is bipartite > it is
nx.is_bipartite(G_checkin)

True

### Connected components in the network

In [57]:
# List connected components
[len(c) for c in sorted(nx.connected_components(G_checkin), key=len, reverse=True)]

[4906, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

We have one very large component and a few small. We choose to continue with the largest connected component.

In [58]:
largest_cc = max(nx.connected_components(G_checkin), key=len)
G = G_checkin.subgraph(max(nx.connected_components(G_checkin), key=len)).copy()

### Vectorized Projection

In [59]:
# Identify the bipartite sets
nodes = nx.algorithms.bipartite.basic.sets(G)
userID = sorted(list(nodes[0]))
venueID = sorted(list(nodes[1]))


In [60]:
# We want to project along venueID
len(venueID)

2861

In [61]:
# Non-square adjacency matrix of our biparitite network
#entries is people, 0 not been there, 1 visited?
adjmat = nx.algorithms.bipartite.matrix.biadjacency_matrix(G, row_order=sorted(venueID))
adjmat.toarray()


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [62]:
#calculates a matrix of pairwise cosine distances between the rows of the adjmat matrix
#to convert distances to similarities we subtract from 1
#cosine similarity is 1.0 when two vectors are identical and 0.0 when they are orthogonal.
c_dist = 1.0 - pairwise_distances(adjmat, metric="cosine", n_jobs=-1)
cosine_dist = c_dist.copy()  # Create a copy of c_dist
np.fill_diagonal(cosine_dist, 0)



In [63]:
# We make the code from the previous question into its own function
def simple(network, nodes):
   T = nx.algorithms.bipartite.matrix.biadjacency_matrix(network, row_order = nodes)
   U = T * T.T
   U.setdiag(0)
   U.eliminate_zeros()
   G = nx.from_scipy_sparse_matrix(U)
   return nx.relabel_nodes(G, {i: nodes[i] for i in range(len(nodes))})

In [64]:
def cosine(network, nodes):
   T = nx.algorithms.bipartite.matrix.biadjacency_matrix(network, row_order = nodes)
   # Since this is a distance, we take the inverse to make it into a similarity.
   # Maximum possible cosine distance is 1, so we take one minus it. If the two nodes
   # were identical, this measure equals to 1, if they were the maximum posible far
   # apart, this measure is zero.
   j_dist = 1.0 - pairwise_distances(T, metric = "cosine", n_jobs = -1)
   np.fill_diagonal(j_dist, 0)
   G = nx.from_numpy_matrix(j_dist)
   return nx.relabel_nodes(G, {i: nodes[i] for i in range(len(nodes))})


In [65]:
G_cosine = cosine(G, nodes)
len(G_cosine.edges)

TypeError: unhashable type: 'set'