In [3]:
# Core data analysis packages
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

from scipy.sparse import csr_matrix 
from sklearn.neighbors import NearestNeighbors
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
# import our files
user_artists = pd.read_csv('data/user_artists.dat',sep='\t')
artists = pd.read_csv('data/artists.dat',sep='\t',usecols=['id','name'])
tags = pd.read_csv('data/tags.dat', sep='\t',encoding='latin-1')
uta = pd.read_csv('data/user_taggedartists.dat', sep='\t')
utat = pd.read_csv('data/user_taggedartists-timestamps.dat', sep="\t")
friends = pd.read_csv('data/user_friends.dat', sep='\t')

## Step 2: Exploratory Data Analysis (EDA) and Preprocessing

In [6]:
num_users = user_artists['userID'].nunique()
num_artists = user_artists['artistID'].nunique()
print(f"Total users: {num_users}, Total artists: {num_artists}")

Total users: 1892, Total artists: 17632


In [7]:
#Create a binary version: 1 if a user has played the artist at least once. We will use this later to decide the commun neighbors.
user_artists['played'] = (user_artists['weight'] > 0).astype(int)

In [8]:
user_artists.head()

Unnamed: 0,userID,artistID,weight,played
0,2,51,13883,1
1,2,52,11690,1
2,2,53,11351,1
3,2,54,10300,1
4,2,55,8983,1


In [14]:
artists

Unnamed: 0,id,name
0,1,MALICE MIZER
1,2,Diary of Dreams
2,3,Carpathian Forest
3,4,Moi dix Mois
4,5,Bella Morte
...,...,...
17627,18741,Diamanda Galás
17628,18742,Aya RL
17629,18743,Coptic Rain
17630,18744,Oz Alchemist


## Step 3: Building the Graph Representation

In [10]:
B = nx.Graph()

# Add user nodes
users = user_artists['userID'].unique()
B.add_nodes_from(users, bipartite='users')

# Add artist nodes (as strings or keep as integers, just be consistent)
artists_nodes = user_artists['artistID'].unique()
B.add_nodes_from(artists_nodes, bipartite='artists')

# Add edges with weight
edges = list(user_artists[['userID', 'artistID', 'weight']].itertuples(index=False, name=None))
B.add_weighted_edges_from(edges)

In [11]:
from networkx.algorithms import bipartite

# Get the user nodes (the projection is done over the 'users' set)
user_nodes = [n for n, d in B.nodes(data=True) if d['bipartite'] == 'users']
user_graph = bipartite.weighted_projected_graph(B, user_nodes)

print("Number of nodes in user graph:", user_graph.number_of_nodes())
print("Number of edges in user graph:", user_graph.number_of_edges())

Number of nodes in user graph: 6282
Number of edges in user graph: 22038


In [None]:
def common_neighbors_score(u, v, G):
    return len(list(nx.common_neighbors(G, u, v)))

# Example: score between user 1 and user 2 (if they exist in the graph)
u, v = 1, 2
if u in user_graph and v in user_graph:
    score = common_neighbors_score(u, v, user_graph)
    print(f"Common Neighbors between {u} and {v}: {score}")