# Retrieve Graph

#### Node Dictionaries
+ track_dict
+ playlist_dict

#### Edge Dictionaries
+ shared_album_edges
+ shared_artist_edges
+ shared_genre_edges
+ cosine_similarity_edges
+ contains_edges

In [1]:
from py2neo import Graph, Node, Relationship
import numpy as np
from collections import defaultdict

uri = "..."
username = "..."
password = "..."
graph = Graph(uri, auth=(username, password))

# Retrieve track nodes and their properties
track_query = '''
MATCH (t:Track)
RETURN t.acousticness, t.album_id, t.artist_ids, t.danceability, t.duration_ms, t.energy, t.explicit,
       t.genre, t.id, t.key, t.liveness, t.loudness, t.mode, t.popularity, t.speechiness, t.tempo, t.valence
'''

track_result = graph.run(track_query).data()

# Create dictionary for tracks
track_dict = {row['t.id']: {
    'acousticness': row['t.acousticness'],
    'album_id': row['t.album_id'],
    'artist_ids': row['t.artist_ids'],
    'danceability': row['t.danceability'],
    'duration_ms': row['t.duration_ms'],
    'energy': row['t.energy'],
    'explicit': row['t.explicit'],
    'genre': row['t.genre'],
    'key': row['t.key'],
    'liveness': row['t.liveness'],
    'loudness': row['t.loudness'],
    'mode': row['t.mode'],
    'popularity': row['t.popularity'],
    'speechiness': row['t.speechiness'],
    'tempo': row['t.tempo'],
    'valence': row['t.valence'],
} for row in track_result}

# Retrieve playlist nodes and their properties
playlist_query = '''
MATCH (p:Playlist)
RETURN p.playlist_id, p.tracklist
'''

playlist_result = graph.run(playlist_query).data()

# Create dictionary playlists
playlist_dict = {row['p.playlist_id']: row['p.tracklist'] for row in playlist_result}

# Create defaultdicts for relationships
shared_album_edges = defaultdict(list)
shared_artist_edges = defaultdict(list)
shared_genre_edges = defaultdict(list)
cosine_similarity_edges = defaultdict(list)
contains_edges = defaultdict(list)

# Retrieve SHARED_ALBUM relationships
shared_album_query = '''
MATCH (t1:Track)-[:SHARED_ALBUM]->(t2:Track)
RETURN t1.id, t2.id
'''
shared_album_result = graph.run(shared_album_query).data()

for row in shared_album_result:
    track1_id, track2_id = row['t1.id'], row['t2.id']
    shared_album_edges[track1_id].append(track2_id)
    shared_album_edges[track2_id].append(track1_id)

# Retrieve SHARED_ARTIST relationships
shared_artist_query = '''
MATCH (t1:Track)-[:SHARED_ARTIST]->(t2:Track)
RETURN t1.id, t2.id
'''
shared_artist_result = graph.run(shared_artist_query).data()

for row in shared_artist_result:
    track1_id, track2_id = row['t1.id'], row['t2.id']
    shared_artist_edges[track1_id].append(track2_id)
    shared_artist_edges[track2_id].append(track1_id)

# Retrieve SHARED_GENRE relationships
shared_genre_query = '''
MATCH (t1:Track)-[:SHARED_GENRE]->(t2:Track)
RETURN t1.id, t2.id
'''
shared_genre_result = graph.run(shared_genre_query).data()

for row in shared_genre_result:
    track1_id, track2_id = row['t1.id'], row['t2.id']
    shared_genre_edges[track1_id].append(track2_id)
    shared_genre_edges[track2_id].append(track1_id)
    
# Retrieve COSINE_SIMILARITY relationships
cosine_similarity_query = '''
MATCH (t1:Track)-[r:COSINE_SIMILARITY]->(t2:Track)
RETURN t1.id, t2.id, r.value
'''
cosine_similarity_result = graph.run(cosine_similarity_query).data()

for row in cosine_similarity_result:
    track1_id, track2_id, similarity = row['t1.id'], row['t2.id'], row['r.value']
    cosine_similarity_edges[track1_id].append(track2_id)
    cosine_similarity_edges[track2_id].append(track1_id)

# Retrieve CONTAINS relationships
contains_query = '''
MATCH (p:Playlist)-[:CONTAINS]->(t:Track)
RETURN p.playlist_id, t.id
'''
contains_result = graph.run(contains_query).data()

for row in contains_result:
    playlist_id, track_id = row['p.playlist_id'], row['t.id']
    contains_edges[playlist_id].append(track_id)

# Creating Adjacency Matrices

In [2]:
from scipy.sparse import lil_matrix

# Get unique track IDs
unique_track_ids = list(track_dict.keys())

# Create a track ID to index mapping
track_id_to_index = {track_id: index for index, track_id in enumerate(unique_track_ids)}

#print(track_id_to_index)

# Get unique playlist IDs
unique_playlist_ids = list(playlist_dict.keys())

# Create a playlist ID to index mapping
playlist_id_to_index = {playlist_id: index for index, playlist_id in enumerate(unique_playlist_ids)}

#print(playlist_id_to_index)

# Create adjacency matrices for each relationship
n_tracks = len(unique_track_ids)
shared_album_adj = lil_matrix((n_tracks, n_tracks), dtype=np.float32)
shared_artist_adj = lil_matrix((n_tracks, n_tracks), dtype=np.float32)
shared_genre_adj = lil_matrix((n_tracks, n_tracks), dtype=np.float32)
cosine_similarity_adj = lil_matrix((n_tracks, n_tracks), dtype=np.float32)

# Fill in the shared_album_adj matrix
for track_id, connected_track_ids in shared_album_edges.items():
    track_index = track_id_to_index[track_id]
    for connected_track_id in connected_track_ids:
        connected_track_index = track_id_to_index[connected_track_id]
        shared_album_adj[track_index, connected_track_index] = .25

# Fill in the shared_artist_adj matrix
for track_id, connected_track_ids in shared_artist_edges.items():
    track_index = track_id_to_index[track_id]
    for connected_track_id in connected_track_ids:
        connected_track_index = track_id_to_index[connected_track_id]
        shared_artist_adj[track_index, connected_track_index] = .25

# Fill in the shared_genre_adj matrix
for track_id, connected_track_ids in shared_genre_edges.items():
    track_index = track_id_to_index[track_id]
    for connected_track_id in connected_track_ids:
        connected_track_index = track_id_to_index[connected_track_id]
        shared_genre_adj[track_index, connected_track_index] = .5

# Fill in the cosine_similarity_adj matrix
for track_id, connected_track_tuples in cosine_similarity_edges.items():
    track_index = track_id_to_index[track_id]
    for connected_track_id in connected_track_tuples:
        connected_track_index = track_id_to_index[connected_track_id]
        cosine_similarity_adj[track_index, connected_track_index] = .95


# KGAT Model Implementation: PyTorch

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import math

class KGATLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(KGATLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        #self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
        self.weight = nn.Parameter(torch.randn(in_features, out_features) * 0.1)  # multiply by 0.1 to increase values
        self.reset_parameters()

#    def reset_parameters(self):
#        gain = nn.init.calculate_gain('relu') * math.sqrt(3)
#        nn.init.uniform_(self.weight, -10, 10)
#        self.weight.data.mul_(gain)

    def reset_parameters(self):
        #print(self.weight)
        #nn.Parameter(torch.randn(11, out_features) * 0.1)  # multiply by 0.1 to increase values
        nn.init.kaiming_uniform_(self.weight, mode='fan_in', nonlinearity='relu')
        #print(self.weight)

    def forward(self, adjacency_matrix, input_features):
        output_features = torch.mm(input_features, self.weight)
        return torch.mm(adjacency_matrix, output_features)

class KGAT(nn.Module):
    def __init__(self, in_features, hidden_features, out_features, num_layers):
        super(KGAT, self).__init__()
        self.input_bn = nn.BatchNorm1d(hidden_features)
        self.hidden_bns = nn.ModuleList([nn.BatchNorm1d(hidden_features) for _ in range(num_layers - 1)])
        self.in_features = in_features
        self.hidden_features = hidden_features
        self.out_features = out_features
        self.num_layers = num_layers

        # Define the input layer
        self.input_layer = KGATLayer(in_features, hidden_features)

        # Define the hidden layers
        self.hidden_layers = nn.ModuleList()
        for _ in range(num_layers - 1):
            self.hidden_layers.append(KGATLayer(hidden_features, hidden_features))

        # Define the output layer
        self.output_layer = KGATLayer(hidden_features, out_features)

    def forward(self, adjacency_matrix, input_features):
        x = self.input_layer(adjacency_matrix, input_features)
        x = x.unsqueeze(2)  # Add an extra dimension for Batch Normalization
        x = F.relu(self.input_bn(x))
        x = x.squeeze(2)  # Remove the extra dimension
        for i, layer in enumerate(self.hidden_layers):
            x = layer(adjacency_matrix, x)
            x = x.unsqueeze(2)  # Add an extra dimension for Batch Normalization
            x = F.relu(self.hidden_bns[i](x))
            x = x.squeeze(2)  # Remove the extra dimension

        # Pass through the output layer
        x = self.output_layer(adjacency_matrix, x)

        return x        

hidden_features = 64  # Number of hidden features in the KGAT model
out_features = 1  # Number of output features in the KGAT model
num_layers = 2  # Number of layers in the KGAT model

numeric_keys = [
    'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
    'liveness', 'loudness', 'popularity', 'speechiness', 'tempo', 'valence'
]

In [4]:
loaded_model = KGAT(num_layers=num_layers, in_features=len(numeric_keys), hidden_features=hidden_features, out_features=out_features)
loaded_model.load_state_dict(torch.load('trained_kgat_model_v100.pth'))


<All keys matched successfully>

In [5]:
from pymongo import MongoClient

# connect to MongoDB
username = "..."
password = "..."
cluster_name = "..."
dbname = "tune-playlists"
client = MongoClient(f"mongodb+srv://{username}:{password}@{cluster_name}.mongodb.net/{dbname}?retryWrites=true&w=majority")
db = client[dbname]
collection = db["tune-users"]

# define the query
query = { "user_id": "..." }

result = collection.find(query, { "top_tracks": 1, "_id": 0 })

track_ids = []

for document in result:
    for track in document['top_tracks']:
        track_ids.append(track['id'])

In [6]:
required_features = [
    't.acousticness', 't.danceability', 't.duration_ms', 't.energy', 't.explicit',
    't.liveness', 't.loudness', 't.popularity', 't.speechiness', 't.tempo', 't.valence'
]
track_features = []
for track in track_result:
    x_features = [track[feature] for feature in required_features]
    track_features.append(x_features)
track_features_tensor = torch.FloatTensor(track_features)

In [7]:
# Convert adjacency matrices to PyTorch tensors
shared_album_adj_tensor = torch.FloatTensor(shared_album_adj.toarray())
shared_artist_adj_tensor = torch.FloatTensor(shared_artist_adj.toarray())
shared_genre_adj_tensor = torch.FloatTensor(shared_genre_adj.toarray())
cosine_similarity_adj_tensor = torch.FloatTensor(cosine_similarity_adj.toarray())

In [24]:
cosine_similarity_adj_tensor = torch.FloatTensor(cosine_similarity_adj.toarray())
def get_ranked_recommendations(user_songs, model, unique_track_ids, track_features):
    # Move the model to the evaluation mode
    model.eval()

    # Create an input tensor containing user's input songs' features
    user_song_indices = [unique_track_ids.index(track_id) for track_id in user_songs if track_id in unique_track_ids]
    user_song_features = track_features[user_song_indices]

    # Calculate the embeddings of the user's input songs using the trained model
    user_song_embeddings = model(cosine_similarity_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_album_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_artist_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_genre_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    
    recommendations = []
    print(len(user_song_embeddings))
    for tx_embedding in user_song_embeddings:
        print(tx_embedding)
        print(tx_embedding.size())
        similarity_scores = torch.mm(track_embeddings, tx_embedding.t()).squeeze()
        sorted_indices = torch.argsort(similarity_scores, descending=True)
        ranked_track_ids = [unique_track_ids[idx] for idx in sorted_indices.tolist()]
        recommendations.append(ranked_track_ids[0])
        
    
    # Calculate the average of the input songs' embeddings
    #avg_embedding = torch.mean(user_song_embeddings, dim=0, keepdim=True)

    # Calculate the similarity score between the average embedding and all tracks in the dataset
    #similarity_scores = torch.mm(track_embeddings, avg_embedding.t()).squeeze()

    # Sort similarity scores in descending order and get the corresponding indices
    #sorted_indices = torch.argsort(similarity_scores, descending=True)

    # Convert the sorted indices to track IDs
    #ranked_track_ids = [unique_track_ids[idx] for idx in sorted_indices.tolist()]

    # Remove the user's input songs from the ranked recommendations
    #recommendations = [track_id for track_id in ranked_track_ids if track_id not in user_songs]

    return recommendations


In [26]:
def get_ranked_recommendations(user_songs, model, unique_track_ids, track_features):
    # Move the model to the evaluation mode
    model.eval()

    # Create an input tensor containing user's input songs' features
    user_song_indices = [unique_track_ids.index(track_id) for track_id in user_songs if track_id in unique_track_ids]
    user_song_features = track_features[user_song_indices]

    # Calculate the embeddings of the user's input songs using the trained model
    user_song_embeddings = model(cosine_similarity_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_album_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_artist_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)
    user_song_embeddings += model(shared_genre_adj_tensor[:len(user_song_indices), :len(user_song_indices)], user_song_features)  
    
    # Calculate the average of the input songs' embeddings
    avg_embedding = torch.mean(user_song_embeddings, dim=0, keepdim=True)

    # Calculate the similarity score between the average embedding and all tracks in the dataset
    similarity_scores = torch.mm(track_embeddings, avg_embedding.t()).squeeze()

    # Sort similarity scores in descending order and get the corresponding indices
    sorted_indices = torch.argsort(similarity_scores, descending=True)

    # Convert the sorted indices to track IDs
    ranked_track_ids = [unique_track_ids[idx] for idx in sorted_indices.tolist()]

    # Remove the user's input songs from the ranked recommendations
    recommendations = [track_id for track_id in ranked_track_ids if track_id not in user_songs]

    return recommendations


In [10]:
def get_ranked_recommendations(user_songs, model, unique_track_ids, track_features):
    # Move the model to the evaluation mode
    model.eval()

    all_recommendations = []
    
    for user_song in user_songs:
        # Create an input tensor containing one of user's songs' features
        user_song_index = unique_track_ids.index(user_song)
        user_song_feature = track_features[user_song_index].unsqueeze(0)  # Add an extra dimension to match the input shape
        
        songX_tensor = torch.FloatTensor(user_song_feature)
        
        print(songX_tensor.size())
        
        # Calculate the embedding of the user's song using the trained model
        user_song_embedding = model(cosine_similarity_adj_tensor, songX_tensor)
        user_song_embedding += model(shared_album_adj_tensor[user_song_index], user_song_feature)
        user_song_embedding += model(shared_artist_adj_tensor[user_song_index], user_song_feature)
        user_song_embedding += model(shared_genre_adj_tensor[user_song_index], user_song_feature)
        
        # Calculate the similarity score between the user's song embedding and all tracks in the dataset
        similarity_scores = torch.mm(track_embeddings, user_song_embedding.t()).squeeze()

        # Sort similarity scores in descending order and get the corresponding indices
        sorted_indices = torch.argsort(similarity_scores, descending=True)

        # Convert the sorted indices to track IDs
        ranked_track_ids = [unique_track_ids[idx] for idx in sorted_indices.tolist()]

        # Remove the user's song from the ranked recommendations
        recommendations = [track_id for track_id in ranked_track_ids if track_id not in user_songs]
        
        all_recommendations.append(recommendations)

    return all_recommendations


In [79]:
import torch

# Convert the adjacency matrices to PyTorch tensors
shared_album_adj = torch.from_numpy(shared_album_adj.toarray())
shared_artist_adj = torch.from_numpy(shared_artist_adj.toarray())
shared_genre_adj = torch.from_numpy(shared_genre_adj.toarray())
cosine_similarity_adj = torch.from_umpy(cosine_similarity_adj.toarray())

# Add a new dimension to each tensor to represent the number of matrices
shared_album_adj = shared_album_adj.unsqueeze(0)
shared_artist_adj = shared_artist_adj.unsqueeze(0)
shared_genre_adj = shared_genre_adj.unsqueeze(0)
cosine_similarity_adj = cosine_similarity_adj.unsqueeze(0)

# Stack the adjacency matrices into a single tensor
adj_matrix = torch.cat([shared_album_adj, shared_artist_adj, shared_genre_adj, cosine_similarity_adj], dim=0)

# Reshape the tensor to match the input shape of the KGAT model
adj_matrix = adj_matrix.unsqueeze(0)  # Add extra dimension for batch size


AttributeError: 'Tensor' object has no attribute 'toarray'

In [71]:
print(shared_album_adj_tensor.size())
print()
print(track_features_tensor.size())

torch.Size([26817, 26817])

torch.Size([26817, 11])


In [66]:
track_embeddings.size()

torch.Size([26817, 1])

In [11]:
recommendations = get_ranked_recommendations(cosine_similarity_adj_tensor, track_ids, loaded_model, unique_track_ids, track_features_tensor)[0]

torch.Size([1, 11])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (26852x26852 and 1x64)

In [47]:
recommendations[-50:]

['3B3A4mqiRhspnA3VCyfEbb',
 '36cG8YmyzibJ4WKfEwAn9d',
 '41fe96o3cxCfvINxcxEe4R',
 '72GQG5AyjaZBDlbWUOw7Xb',
 '1xrSV0FyGg77IuJGYF3siH',
 '7BDFu1LVCAe0yZDWcF5d7P',
 '52PCi5DlwcHFknaMyEE9cD',
 '5kNbpvJ5b6R9Zqfm6c9sIX',
 '2acK24b60RQD2zBpW0Zsrw',
 '2kTRzrl2zO2ghx1EBRT9lP',
 '1vNNfTgHsrpOXeiaXQBlH7',
 '6dgKHNwngP42Iww8HOsSH6',
 '4qGK21F2n8zYRNsllinNaS',
 '45XgvZ2ggPKibnPGtduIGY',
 '1jss21gv4BPA8FZv2VNDuV',
 '5Dvd2vfqvPg0RCXQ9DpKee',
 '00bq71I9OZ87M6jZqQFV6I',
 '3em4Dn6hxzdgjKpLgMZSy1',
 '6btkdvumrTmcFzy3oFpZqS',
 '1Gz3r6XFSqYdqBhyYaHtto',
 '0SrLz2zZUMYvYuiKaMaq1I',
 '7tQyxRUBcy0XVZbATedxBU',
 '1nV6VafLPuRSsXgbDY3i6L',
 '5LghBGmNUUwI2BvX2boIdK',
 '3Cl8IGVb7E63EDIXTn5Pk7',
 '7fj9sa5dPVayB0YtsAX9Kb',
 '4BIuY0oEopXizyP3WvkNrT',
 '175Xa0mLfc90AlFMHMfE9O',
 '08DqG1RqpxnICWOdOp2PLV',
 '0SPAmRhrRUcBvDV4uctdcx',
 '5osHRqrBmK2Am2FhoNm2FL',
 '53hco57B1r9b2HPHGek4qu',
 '7MA8T4pKN6VeBIEGE9hggN',
 '5OWt21Fcte8qlNuf9r0fVI',
 '0tAFy8LVdMQgW6A6Nq7olS',
 '4FKRT4uPFx2L4exy9DCs1o',
 '1unMIRKio7PpOxPWRDMtRc',
 

In [48]:
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy

client_credentials_manager = SpotifyClientCredentials(client_id='...', client_secret="...")
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

def get_track_info(track_id):
    track_info = sp.track(track_id)
    track_name = track_info['name']
    artist_info = track_info['artists']
    artist_names = [artist['name'] for artist in artist_info]
    return track_name, artist_names

for track_id in recommendations[-50:]:
    track_name, artist_names = get_track_info(track_id)
    print(f'Track name: {track_name}')
    print(f'Artist(s): {", ".join(artist_names)}')
    print()

Track name: Acapella
Artist(s): Kelis

Track name: Sunworshipper
Artist(s): Mylo

Track name: Anything You Want (Not That)
Artist(s): Belleruche

Track name: Cannot Contain This - Radio Edit
Artist(s): Moloko

Track name: At Night
Artist(s): Shakedown

Track name: Groovejet
Artist(s): Spiller

Track name: Scratched
Artist(s): Étienne de Crécy

Track name: Strict Machine
Artist(s): Goldfrapp

Track name: Blind - Radio Edit
Artist(s): Hercules & Love Affair

Track name: Summer's Here
Artist(s): Magnus

Track name: Safe From Harm - 2012 Mix/Master
Artist(s): Massive Attack

Track name: Central Reservation - Spiritual Life - Ibadan Edit
Artist(s): Beth Orton, Joaquin "Joe" Claussell, Jerome Sydenham

Track name: I Feel Better
Artist(s): Hot Chip

Track name: Over & Over
Artist(s): Moloko

Track name: Invisible Light
Artist(s): Scissor Sisters

Track name: just wanna luv
Artist(s): Conro

Track name: You and Me
Artist(s): The Magician

Track name: Without You - Mesto Remix
Artist(s): Mike W