In [1]:
import pandas as pd
import neo4j as neo

In [2]:
spotify = pd.read_csv('spotify/spotify.csv')

In [3]:
feats = ['popularity', 'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness','acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']

for feat in feats:
    # Standardize the feature
    spotify[feat] = (spotify[feat] - spotify[feat].mean())/spotify[feat].std() 

In [4]:
strokes = spotify.loc[(spotify['artists'] == 'The Strokes') & (spotify['album_name'] == 'Is This It')]
spotify = spotify.loc[(spotify['artists'] != 'The Strokes') & (spotify['track_genre'].isin(list(strokes['track_genre'].unique())))]

In [5]:
strokes = dict(strokes.T)

In [6]:
spotify = spotify.sample(1000)

In [7]:
spotify = dict(spotify.T)

In [8]:
password = 'j4f^q3WrwZYw7u3gx&JsgknBnTkUJPX'

# Connect to neo4j
neo4j = neo.GraphDatabase.driver('neo4j://localhost:7687', auth=('neo4j', password))


def cosine_similarity(v1, v2):
    return sum([a*b for a,b in zip(v1, v2)]) / (sum([a**2 for a in v1])**0.5 * sum([b**2 for b in v2])**0.5)

with neo4j.session() as session:
    session.run('MATCH (n) DETACH DELETE n')
    for i, track in spotify.items():
        session.run('CREATE (t:Track {name: $name, artists: $artists, genre: $genre, popularity: $popularity, duration: $duration, explicit: $explicit, danceability: $danceability, energy: $energy, key: $key, loudness: $loudness, mode: $mode, speechiness: $speechiness, acousticness: $acousticness, instrumentalness: $instrumentalness, liveness: $liveness, valence: $valence, tempo: $tempo, time_signature: $time_signature})', name=track['track_name'], artists=track['artists'], genre=track['track_genre'], popularity=track['popularity'], duration=track['duration_ms'], explicit=track['explicit'], danceability=track['danceability'], energy=track['energy'], key=track['key'], loudness=track['loudness'], mode=track['mode'], speechiness=track['speechiness'], acousticness=track['acousticness'], instrumentalness=track['instrumentalness'], liveness=track['liveness'], valence=track['valence'], tempo=track['tempo'], time_signature=track['time_signature'])
        for j, other_track in spotify.items():
            if i != j:
                similarity = cosine_similarity([track['danceability'], track['energy'], track['key'], track['loudness'], track['mode'], track['speechiness'], track['acousticness'], track['instrumentalness'], track['liveness'], track['valence'], track['tempo'], track['time_signature']], [other_track['danceability'], other_track['energy'], other_track['key'], other_track['loudness'], other_track['mode'], other_track['speechiness'], other_track['acousticness'], other_track['instrumentalness'], other_track['liveness'], other_track['valence'], other_track['tempo'], other_track['time_signature']])
                if (similarity > 0.9) and (track['track_genre'] == other_track['track_genre']):
                    session.run('MATCH (t1:Track {name: $name1}), (t2:Track {name: $name2}) CREATE (t1)-[:SIMILAR_TO {similarity: $similarity}]->(t2)', name1=track['track_name'], name2=other_track['track_name'], similarity=similarity)
                    
    for i, track in strokes.items():
        session.run('CREATE (t:Strokes {name: $name, artists: $artists, genre: $genre, popularity: $popularity, duration: $duration, explicit: $explicit, danceability: $danceability, energy: $energy, key: $key, loudness: $loudness, mode: $mode, speechiness: $speechiness, acousticness: $acousticness, instrumentalness: $instrumentalness, liveness: $liveness, valence: $valence, tempo: $tempo, time_signature: $time_signature})', name=track['track_name'], artists=track['artists'], genre=track['track_genre'], popularity=track['popularity'], duration=track['duration_ms'], explicit=track['explicit'], danceability=track['danceability'], energy=track['energy'], key=track['key'], loudness=track['loudness'], mode=track['mode'], speechiness=track['speechiness'], acousticness=track['acousticness'],instrumentalness=track['instrumentalness'], liveness=track['liveness'], valence=track['valence'], tempo=track['tempo'], time_signature=track['time_signature'])
        for j, other_track in spotify.items():
            if i != j:
                similarity = cosine_similarity([track['danceability'], track['energy'], track['key'], track['loudness'], track['mode'], track['speechiness'], track['acousticness'], track['instrumentalness'], track['liveness'], track['valence'], track['tempo'], track['time_signature']], [other_track['danceability'], other_track['energy'], other_track['key'], other_track['loudness'], other_track['mode'], other_track['speechiness'], other_track['acousticness'], other_track['instrumentalness'], other_track['liveness'], other_track['valence'], other_track['tempo'], other_track['time_signature']])
                if (similarity > 0.9) and (track['track_genre'] == other_track['track_genre']):
                    session.run('MATCH (t1:Strokes {name: $name1}), (t2:Track {name: $name2}) CREATE (t1)-[:SIMILAR_TO {similarity: $similarity}]->(t2)', name1=track['track_name'], name2=other_track['track_name'], similarity=similarity) 

In [19]:
# Find the most similar tracks to The Strokes
with neo4j.session() as session:
    result = session.run('MATCH (s:Strokes)-[:SIMILAR_TO]->(t:Track) RETURN t.name, t.artists, t.genre ORDER BY t.similarity  DESC LIMIT 9')
    for record in result:
        print(record)

<Record t.name='Figure It Out' t.artists='Royal Blood' t.genre='garage'>
<Record t.name='Figure It Out' t.artists='Royal Blood' t.genre='alt-rock'>
<Record t.name='Figure It Out' t.artists='Royal Blood' t.genre='alt-rock'>
<Record t.name='Old Yellow Bricks' t.artists='Arctic Monkeys' t.genre='garage'>
<Record t.name='Waiting My Whole Life' t.artists='The Grogans' t.genre='garage'>
<Record t.name='Cut You Deep' t.artists='Dear Seattle' t.genre='garage'>
<Record t.name='Mary, Don’t Go' t.artists='Shannon & The Clams' t.genre='garage'>
<Record t.name='Tattooed Smiles' t.artists='Black Box Revelation' t.genre='garage'>
<Record t.name='Figure It Out' t.artists='Royal Blood' t.genre='garage'>
