In [1]:
import pandas as pd

# Load the checkins and edges text files
checkins_df = pd.read_csv('loc-gowalla_totalCheckins.txt\Gowalla_totalCheckins.txt', delimiter='\t')  # assuming tab-separated
edges_df = pd.read_csv('loc-gowalla_edges.txt\Gowalla_edges.txt', delimiter='\t')  # assuming tab-separated

In [2]:
checkins_df.columns = ['user', 'timestamp', 'latitude', 'longitude', 'location_id']
edges_df.columns = ['user_id_1', 'user_id_2']

In [3]:
checkins_df.head()

Unnamed: 0,user,timestamp,latitude,longitude,location_id
0,0,2010-10-18T22:17:43Z,30.269103,-97.749395,420315
1,0,2010-10-17T23:42:03Z,30.255731,-97.763386,316637
2,0,2010-10-17T19:26:05Z,30.263418,-97.757597,16516
3,0,2010-10-16T18:50:42Z,30.274292,-97.740523,5535878
4,0,2010-10-12T23:58:03Z,30.261599,-97.758581,15372


In [None]:
len(checkins_df['user'].unique()), len(edges_df['user_id_1'].unique()), len(edges_df['user_id_2'].unique()) 

(107092, 196591, 196591)

In [4]:
edges_df.head()

Unnamed: 0,user_id_1,user_id_2
0,0,2
1,0,3
2,0,4
3,0,5
4,0,6


In [10]:
import pandas as pd
import numpy as np
import torch
from scipy.sparse import coo_matrix


# Drop NaN values (if any)
edges_df.dropna(inplace=True)

In [12]:
# Remap user IDs to a contiguous zero-based range
all_users = np.concatenate([edges_df['user_id_1'].values, edges_df['user_id_2'].values])
unique_users, user_map = np.unique(all_users, return_inverse=True)

In [14]:
user_map

array([     0,      0,      0, ..., 196540, 196547, 196561], dtype=int64)

In [15]:
# Assign new user IDs
user_1 = user_map[: len(edges_df)]
user_2 = user_map[len(edges_df) :]

# Ensure undirected graph (add reverse edges)
user_1 = np.concatenate([user_1, user_2])
user_2 = np.concatenate([user_2, user_1[: len(user_2)]])  # FIXED HERE

# Compute the correct number of users
num_users = len(unique_users)

# Create adjacency matrix
data = np.ones(len(user_1))  # Each interaction has weight=1

# Check for mismatched lengths
assert len(user_1) == len(user_2) == len(data), f"Mismatch in lengths: {len(user_1)}, {len(user_2)}, {len(data)}"

# Create the sparse adjacency matrix
adj_matrix = coo_matrix((data, (user_1, user_2)), shape=(num_users, num_users))

# Convert to PyTorch sparse tensor
adj_matrix_tensor = torch.sparse_coo_tensor(
    torch.LongTensor(np.vstack((adj_matrix.row, adj_matrix.col))),
    torch.FloatTensor(adj_matrix.data),
    adj_matrix.shape
)

print(f"Adjacency Matrix Shape: {adj_matrix_tensor.shape}")

Adjacency Matrix Shape: torch.Size([196591, 196591])


In [16]:
# Compute the degree matrix D
degree_matrix = np.array(adj_matrix.sum(axis=1)).flatten()

# D^{-1/2}
degree_inv_sqrt = np.power(degree_matrix, -0.5)

# Apply normalization
norm_adj_matrix = adj_matrix.multiply(degree_inv_sqrt).multiply(degree_inv_sqrt[:, np.newaxis])

# Convert to tensor
norm_adj_matrix_tensor = torch.sparse_coo_tensor(torch.LongTensor([norm_adj_matrix.row, norm_adj_matrix.col]), torch.FloatTensor(norm_adj_matrix.data), norm_adj_matrix.shape)

print(f"Normalized Adjacency Matrix Shape: {norm_adj_matrix_tensor.shape}")


Normalized Adjacency Matrix Shape: torch.Size([196591, 196591])


  norm_adj_matrix_tensor = torch.sparse_coo_tensor(torch.LongTensor([norm_adj_matrix.row, norm_adj_matrix.col]), torch.FloatTensor(norm_adj_matrix.data), norm_adj_matrix.shape)
