In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import warnings
warnings.filterwarnings('ignore')
from scipy.spatial import distance
from sklearn.metrics import pairwise_distances

### Read network

In [2]:
# Load data
# First column is User ID while second column is Venue ID 
G_checkin = nx.read_edgelist('NYC_restaurant_checkin/edges.csv', delimiter=",")

In [3]:
#check if it is bipartite > it is
nx.is_bipartite(G_checkin)

True

### Connected components in the network

In [4]:
# List connected components
[len(c) for c in sorted(nx.connected_components(G_checkin), key=len, reverse=True)]

[4906, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

We have one very large component and a few small. We choose to continue with the largest connected component.

In [62]:
largest_cc = max(nx.connected_components(G_checkin), key=len)
G = G_checkin.subgraph(max(nx.connected_components(G_checkin), key=len)).copy()

### Vectorized Projection

In [64]:
# Identify the bipartite sets
nodes = nx.algorithms.bipartite.basic.sets(G)
userID = sorted(list(nodes[0]))
venueID = sorted(list(nodes[1]))


In [7]:
# We want to project along venueID
len(venueID)

2861

In [25]:
# Non-square adjacency matrix of our biparitite network
#entries is people, 0 not been there, 1 visited?
adjmat = nx.algorithms.bipartite.matrix.biadjacency_matrix(G, row_order=sorted(venueID))
adjmat.toarray()


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [54]:
#calculates a matrix of pairwise cosine distances between the rows of the adjmat matrix
#to convert distances to similarities we subtract from 1
#cosine similarity is 1.0 when two vectors are identical and 0.0 when they are orthogonal.
c_dist = 1.0 - pairwise_distances(adjmat, metric = "cosine", n_jobs = -1)
cosine_dist  = np.fill_diagonal(c_dist, 0)




EdgeView([(0, 23), (0, 26), (0, 34), (0, 51), (0, 57), (0, 62), (0, 63), (0, 67), (0, 73), (0, 76), (0, 88), (0, 91), (0, 92), (0, 95), (0, 110), (0, 124), (0, 133), (0, 136), (0, 137), (0, 145), (0, 147), (0, 153), (0, 174), (0, 182), (0, 189), (0, 195), (0, 207), (0, 214), (0, 242), (0, 245), (0, 248), (0, 252), (0, 256), (0, 259), (0, 260), (0, 265), (0, 266), (0, 273), (0, 295), (0, 300), (0, 319), (0, 325), (0, 332), (0, 353), (0, 372), (0, 373), (0, 383), (0, 384), (0, 410), (0, 412), (0, 419), (0, 438), (0, 449), (0, 450), (0, 456), (0, 498), (0, 509), (0, 513), (0, 528), (0, 530), (0, 573), (0, 585), (0, 595), (0, 598), (0, 605), (0, 622), (0, 638), (0, 650), (0, 651), (0, 658), (0, 660), (0, 669), (0, 682), (0, 747), (0, 756), (0, 792), (0, 793), (0, 805), (0, 813), (0, 819), (0, 820), (0, 824), (0, 834), (0, 836), (0, 840), (0, 864), (0, 872), (0, 878), (0, 894), (0, 896), (0, 903), (0, 918), (0, 943), (0, 947), (0, 959), (0, 967), (0, 987), (0, 994), (0, 1002), (0, 1010), (0

In [74]:
# We get the projected graph back. We relabel to recover the original node IDs
G_cosine= nx.from_numpy_matrix(cosine_dist)
#G_cosine = nx.relabel_nodes(G_cosine, {i: nodes[i] for i in range(len(nodes))})



AttributeError: 'NoneType' object has no attribute 'ndim'