In [1]:
#Loading Tracks and Playlists

from py2neo import Graph, Node, Relationship
import numpy as np
from collections import defaultdict

uri = "..."
username = "..."
password = "..."
graph = Graph(uri, auth=(username, password))

# Retrieve track nodes and their properties
track_query = '''
MATCH (t:Track)
RETURN t.acousticness, t.album_id, t.artist_ids, t.danceability, t.duration_ms, t.energy, t.explicit,
       t.genre, t.id, t.key, t.liveness, t.loudness, t.mode, t.popularity, t.speechiness, t.tempo, t.valence
'''

track_result = graph.run(track_query).data()

# Create dict for tracks
track_dict = {row['t.id']: {
    'index': idx,
    'acousticness': row['t.acousticness'],
    'album_id': row['t.album_id'],
    'artist_ids': row['t.artist_ids'],
    'danceability': row['t.danceability'],
    'duration_ms': row['t.duration_ms'],
    'energy': row['t.energy'],
    'explicit': row['t.explicit'],
    'genre': row['t.genre'],
    'liveness': row['t.liveness'],
    'loudness': row['t.loudness'],
    'popularity': row['t.popularity'],
    'speechiness': row['t.speechiness'],
    'tempo': row['t.tempo'],
    'valence': row['t.valence'],
} for idx, row in enumerate(track_result)}


# Retrieve playlist nodes and their properties
playlist_query = '''
MATCH (p:Playlist)-[:CONTAINS]->(t:Track)
WITH p.playlist_id AS playlist_id, collect(t.id) AS tracklist
RETURN playlist_id, tracklist
'''

playlist_result = graph.run(playlist_query).data()

# Create playlists dictionary
playlist_dict = {row['playlist_id']: {
    'index': idx,
    'track_ids': row['tracklist']
} for idx, row in enumerate(playlist_result)}

# Grabbing Relationship Data

n_playlists = len(playlist_dict)
n_tracks = len(track_dict)

# CONTAINS relationship (playlist-track)
contains_query = '''
MATCH (p:Playlist)-[:CONTAINS]->(t:Track)
RETURN p.playlist_id AS playlist_id, t.id AS track_id
'''
contains_result = graph.run(contains_query).data()

# Create CONTAINS list (Should be playlist_id and track_id pairs)
contains_list = [(record['playlist_id'], record['track_id']) for record in contains_result]

# SHARED_ALBUM, SHARED_ARTIST, and SHARED_GENRE lists (tuple array should be track1_id and track2_id)
album_query = '''
    MATCH (t1:Track)-[:SHARED_ALBUM]->(t2:Track)
    RETURN t1.id AS track1_id, t2.id AS track2_id
    '''
album_result = graph.run(album_query).data()
album_list = [(record['track1_id'], record['track2_id']) for record in album_result]

artist_query = '''
    MATCH (t1:Track)-[:SHARED_ARTIST]->(t2:Track)
    RETURN t1.id AS track1_id, t2.id AS track2_id
    '''
artist_result = graph.run(album_query).data()
artist_list = [(record['track1_id'], record['track2_id']) for record in artist_result]

genre_query = '''
    MATCH (t1:Track)-[:SHARED_GENRE]->(t2:Track)
    RETURN t1.id AS track1_id, t2.id AS track2_id
    '''
genre_result = graph.run(album_query).data()
genre_list = [(record['track1_id'], record['track2_id']) for record in genre_result]


# COSINE_SIMILARITY relationship (track-track)
# tuple array will be (track1_id, track2_id, similarity_value)
cosine_similarity_query = '''
MATCH (t1:Track)-[r:COSINE_SIMILARITY]->(t2:Track)
RETURN t1.id AS track1_id, t2.id AS track2_id, r.value AS similarity_value
'''
cosine_similarity_result = graph.run(cosine_similarity_query).data()
cosine_list = [(record['track1_id'], record['track2_id'], record['similarity_value']) for record in cosine_similarity_result]

track_similarity_matrix = np.zeros((n_tracks, n_tracks))

for track1_id, track2_id, similarity_value in cosine_list:
    track1_idx = track_dict[track1_id]['index']
    track2_idx = track_dict[track2_id]['index']
    track_similarity_matrix[track1_idx][track2_idx] = similarity_value/2
    track_similarity_matrix[track2_idx][track1_idx] = similarity_value/2
    
for track1_id, track2_id in album_list:
    track1_idx = track_dict[track1_id]['index']
    track2_idx = track_dict[track2_id]['index']
    track_similarity_matrix[track1_idx][track2_idx] += 0.50
    track_similarity_matrix[track2_idx][track1_idx] += 0.50
    
for track1_id, track2_id in artist_list:
    track1_idx = track_dict[track1_id]['index']
    track2_idx = track_dict[track2_id]['index']
    track_similarity_matrix[track1_idx][track2_idx] += 0.75
    track_similarity_matrix[track2_idx][track1_idx] += 0.75
    
for track1_id, track2_id in genre_list:
    track1_idx = track_dict[track1_id]['index']
    track2_idx = track_dict[track2_id]['index']
    track_similarity_matrix[track1_idx][track2_idx] += 0.75
    track_similarity_matrix[track2_idx][track1_idx] += 0.75

In [2]:
import random

train_playlists = {}
test_playlists = {}

for playlist_id, playlist_data in playlist_dict.items():
    track_ids = list(playlist_data["track_ids"])
    random.shuffle(track_ids)

    train_size = int(0.8 * len(track_ids))
    train_track_ids = track_ids[:train_size]
    test_track_ids = track_ids[train_size:]

    train_playlists[playlist_id] = {"tracks": set(train_track_ids)}
    test_playlists[playlist_id] = {"tracks": set(test_track_ids)}

In [9]:
import heapq
from collections import defaultdict

def playlist_vector(playlist_id):
    track_ids = playlist_dict[playlist_id]['track_ids']
    return np.mean([track_similarity_matrix[track_dict[track_id]['index']] for track_id in track_ids], axis=0)

def find_similar_playlists(target_playlist_id):
    target_playlist_vector = playlist_vector(target_playlist_id)
    similarities = []

    for other_playlist_id, other_playlist_data in playlist_dict.items():
        if other_playlist_id == playlist_id:
            continue

        other_playlist_vector = playlist_vector(other_playlist_id)
        similarity = np.dot(target_playlist_vector, other_playlist_vector)
        similarities.append((other_playlist_id, similarity))

    similar_playlists = heapq.nlargest(len(similarities), similarities, key=lambda x: x[1])
    return similar_playlists

def cf_recommendation(target_playlist_id, k=10):
    similar_playlists = find_similar_playlists(target_playlist_id)
    track_scores = defaultdict(float)

    for playlist_id, similarity in similar_playlists:
        for track_id in playlist_dict[playlist_id]['track_ids']:
            track_scores[track_id] += similarity

    sorted_scores = heapq.nlargest(k, track_scores.items(), key=lambda x: x[1])
    recommended_track_ids = [[track_id, score] for track_id, score in sorted_scores if track_id not in playlist_dict[playlist_id]['track_ids']]
    return recommended_track_ids

def cbf_recommendation(target_playlist_id, track_similarity_matrix, k=10):
    track_ids = playlist_dict[target_playlist_id]['track_ids']
    playlist_track_matrix = track_similarity_matrix[[track_dict[track_id]['index'] for track_id in track_ids]]
    mean_similarity = np.mean(playlist_track_matrix, axis=0)
    top_indices = heapq.nlargest(k+len(track_ids), range(len(mean_similarity)), key=lambda i: mean_similarity[i])
    recommended_indices = [i for i in top_indices if i not in [track_dict[track_id]['index'] for track_id in track_ids]][:k]
    recommended_tracks = [(track_result[i]['t.id'], mean_similarity[i]) for i in recommended_indices]
    return recommended_tracks

def hybrid_recommendation(target_playlist_id, track_similarity_matrix, k=10, cf_weight=0.5):
    cf_recommendations = cf_recommendation(target_playlist_id, k)
    cbf_recommendations = cbf_recommendation(target_playlist_id, track_similarity_matrix, k)
    
    track_scores = defaultdict(float)
    for track_id, score in cf_recommendations:
        track_scores[track_id] += cf_weight * score

    for track_id, score in cbf_recommendations:
        track_scores[track_id] += (1 - cf_weight) * score

    sorted_scores = heapq.nlargest(k, track_scores.items(), key=lambda x: x[1])
    recommended_track_ids = [track_id for track_id, score in sorted_scores]
    return recommended_track_ids

In [10]:
# Example usage
#target_playlist_id = "Mikes Playlist 2"
#target_playlist_id = "Jules Playlist"
#target_playlist_id = "Johns Playlist"
target_playlist_id = "Connors Playlist"

recommended_tracks = hybrid_recommendation(target_playlist_id, track_similarity_matrix, k=50, cf_weight=0.3)

In [11]:
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy

client_credentials_manager = SpotifyClientCredentials(client_id='...', client_secret="...") 
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

def get_track_info(track_id):
    track_info = sp.track(track_id)
    track_name = track_info['name']
    artist_info = track_info['artists']
    artist_names = [artist['name'] for artist in artist_info]
    return track_name, artist_names

for track_id in recommended_tracks:
    track_name, artist_names = get_track_info(track_id)
    print(f'Track name: {track_name}')
    print(f'Artist(s): {", ".join(artist_names)}')
    print()

Track name: Best Part Missing
Artist(s): Mike Williams

Track name: Elenore
Artist(s): Sultan + Shepard, Andrew Belle

Track name: Dive
Artist(s): Win and Woo, Luxtides

Track name: Beam Me Up (VIP)
Artist(s): Win and Woo, Cosmos & Creature

Track name: Lucky Ones
Artist(s): yetep, SABAI, Leslie Powell

Track name: Gratitude
Artist(s): Above & Beyond, anamē, Marty Longstaff

Track name: Move On
Artist(s): Ekko City, Movada, Mokita

Track name: BACK ONLINE
Artist(s): MEMBA, pluko, EVAN GIIA, Biicla

Track name: I Lived
Artist(s): OneRepublic

Track name: Hopeful
Artist(s): ODESZA

Track name: I Go Dancing (feat. Ella Henderson)
Artist(s): Frank Walker, Ella Henderson

Track name: Losing Ground
Artist(s): Sultan + Shepard, Tishmal

Track name: Falling Fast (feat. GG Magree)
Artist(s): Seven Lions, GG Magree

Track name: My Heart’s on Fire
Artist(s): Vicetone, Qvckslvr

Track name: The Dark
Artist(s): WHIPPED CREAM, Jasiah, Crimson Child

Track name: Making Time
Artist(s): Sultan + Shepar