In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

import warnings
warnings.filterwarnings('ignore')

### Read network

In [4]:
# Load data
# First column is User ID while second column is Venue ID 
G_checkin = nx.read_edgelist('NYC_restaurant_checkin/edges.csv', delimiter=",", create_using=nx.MultiGraph)

In [5]:
nx.is_bipartite(G_checkin)

True

### Connected components in the network

In [6]:
# List connected components
[len(c) for c in sorted(nx.connected_components(G_checkin), key=len, reverse=True)]

[4906, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

We have one very large component and a few small. We choose to continue with the largest connected component.

In [7]:
largest_cc = max(nx.connected_components(G_checkin), key=len)
G = G_checkin.subgraph(largest_cc).copy()

### Projection

In [8]:
# Identify the bipartite sets
nodes = nx.algorithms.bipartite.basic.sets(G)
userID = nodes[0]
venueID = nodes[1]

In [9]:
# We want to project along venueID
len(venueID)

2861

In [12]:
# Non-square adjacency matrix
adjmat = nx.algorithms.bipartite.matrix.biadjacency_matrix(G_checkin, row_order=sorted(venueID))
print(adjmat.shape)

# Project along smaller axis
if adjmat.shape[0] == 2861:
   adjmat_proj = adjmat.dot(adjmat.T)
else:
   adjmat_proj = adjmat.T.dot(adjmat)


(2861, 2075)


In [13]:
# Export to csv
G_proj = nx.from_numpy_matrix(adjmat_proj)
nx.write_edgelist(G_proj, 'edgelist_simpleweights_proj.csv', comments='# source, target')