# Spotify MPD Collaborative Filtering
- Idé: Rekommendera ett namn för givna spellistan genom att ge den samma namn som den närmaste playlisten.
- Skapa en funktion som konverterar spellistorna att ha bara artisten och namnet i sig.

In [1]:
import json
import os
import numpy as np
import math
from typing import List, Tuple
from collections import defaultdict
import random

playlists = []
DATA_PATH = "mpd/data_samples"
REWRITE_DATA = True # Set to True to not use the stored pickled vectors 

num_files = len(os.listdir(DATA_PATH))

# Load in every json file from the mpd/data_samples directory
for filename in os.listdir(DATA_PATH):
    with open(os.path.join(DATA_PATH, filename), "rt", encoding="utf-8") as f:
        playlists.extend(json.load(f)["playlists"])

playlists = playlists[:200]

## Förklara och visualisera datan
lorem ipsum

## Bygg rekommenderaren

In [2]:
unique_songs = sorted({f"{song['artist_name']} - {song['track_name']}"
                           for playlist in playlists
                           for i, song in enumerate(playlist["tracks"])})
random.shuffle(unique_songs)
print("Number of jsons: ", num_files)
print("Unique songs:", len(unique_songs))

Number of jsons:  1
Unique songs: 9641


In [3]:
# Sample of the song names
[f"{s['artist_name']} - {s['track_name']}" for i, s in enumerate(playlists[69]["tracks"][:5])]

['The Civil Wars - From This Valley',
 'Josh Garrels - Farther Along',
 'Ethan Pierce - Dark Skies',
 'Paramore - We Are Broken',
 "Brandi Carlile - That Wasn't Me"]

In [4]:
def to_track_names(playlists):
    return [[f"{s['artist_name']} - {s['track_name']}" for i, s in enumerate(playlist["tracks"])] for playlist in playlists]

def make_playlist_vector(playlist: dict) -> List[int]:
    """
    Given a list of interests, produce a vector whose ith element is 1
    if unique_interests[i] is in the list, 0 otherwise
    """
    return [
        1
        if song
        in [f"{s['artist_name']} - {s['track_name']}" for i, s in enumerate(playlist["tracks"])]
        else 0
        for song in unique_songs #[:100] for testing
    ]

def make_item_based_vectors(playlist_vectors, unique_songs):
    return [[playlist_vector[i]
     for playlist_vector in playlist_vectors]
    for i, _ in enumerate(unique_songs)]


In [5]:
import time
import pickle

t0 = time.time()

try:
    if REWRITE_DATA:
        print("Rewriting data: Generating vectors and dumping them")
        playlist_vectors = [make_playlist_vector(playlist) for playlist in playlists] # [:100] for testing
        f = open("playlist_vectors.pickle", "wb")
        pickle.dump(playlist_vectors, f)
    else:
        f = open("playlist_vectors.pickle", "rb")
        print("Pickled data found.")
        playlist_vectors = pickle.load(f)
except(FileNotFoundError):
    print("Pickled data not found, generating vectors...")
    playlist_vectors = [make_playlist_vector(playlist) for playlist in playlists] # [:100] for testing
    f = open("playlist_vectors.pickle", "wb")
    pickle.dump(playlist_vectors, f)
finally:
    f.close()

t1 = time.time()
print(f"Time taken: {round(t1-t0, 2)} s")
print("vectors length:", len(playlist_vectors))
 
print("Sum of the vectors(useless metric):", sum(np.array(playlist_vectors).flatten()))

Rewriting data: Generating vectors and dumping them
Time taken: 14.43 s
vectors length: 200
Sum of the vectors(useless metric): 12664


In [6]:
Vector = List[float]
pelle_id = 23
pelles_interests = playlist_vectors[pelle_id]

def dot(v: Vector, w: Vector) -> float:
    """Computes v_1 * w_1 + ... + v_n * w_n"""
    assert len(v) == len(w), "vectors must be same length"

    return sum(v_i * w_i for v_i, w_i in zip(v, w))

def cosine_similarity(v1: Vector, v2: Vector) -> float:
    return dot(v1, v2) / math.sqrt(dot(v1, v1) * dot(v2, v2))

pelles_similarities = [cosine_similarity(pelles_interests, playlist_vector_i)
                     for playlist_vector_i in playlist_vectors]


In [7]:
def sort_similar_playlists(similarities, user_id):
    id_similarity = [(id, similarity) for id, similarity in enumerate(similarities) if id != user_id and similarity > 0]
    return sorted(id_similarity,                              
                  key=lambda pair: pair[-1],                  
                  reverse=True)

sort_similar_playlists(pelles_similarities, pelle_id)
playlists_with_tracknames = to_track_names(playlists)

In [8]:
def user_based_suggestions(similarities, user_id: int, 
                           include_current_interests: bool = False,
                          max_suggestions: int=10): 
    # Sum up the similarities 
    suggestions: Dict[str, float] = defaultdict(float) 
    for other_user_id, similarity in sort_similar_playlists(similarities, user_id): 
        for song in playlists_with_tracknames[other_user_id]: 
            suggestions[song] += similarity 
 
    # Convert them to a sorted list 
    suggestions = sorted(suggestions.items(), 
                         key=lambda pair: pair[-1],  # weight 
                         reverse=True) 
 
    # And (maybe) exclude already interests 
    if include_current_interests: 
        return suggestions 
    else: 
        return [(suggestion, weight) 
                for suggestion, weight in suggestions 
                if suggestion not in playlists_with_tracknames[user_id]][:max_suggestions]
    


In [9]:
user_based_suggestions(pelles_similarities, pelle_id, False, 12)

[('Avenged Sevenfold - Shepherd of Fire', 0.08038483366636998),
 ('Iron Maiden - Run to the Hills - 1998 Remastered Version',
  0.08038483366636998),
 ('Metallica - Enter Sandman', 0.08038483366636998),
 ('Metallica - Master Of Puppets', 0.08038483366636998),
 ('Slipknot - The Devil In I', 0.08038483366636998),
 ('Scorpions - Rock You Like A Hurricane', 0.08038483366636998),
 ('50 Cent - Many Men (Wish Death)', 0.05175491695067657),
 ('I Prevail - Blank Space', 0.05175491695067657),
 ('I Prevail - Scars', 0.05175491695067657),
 ('Lil Dicky - $ave Dat Money (feat. Fetty Wap & Rich Homie Quan)',
  0.05175491695067657),
 ('Lil Dicky - Professional Rapper (feat. Snoop Dogg)', 0.05175491695067657),
 ('Halestorm - I Miss The Misery', 0.05175491695067657)]

In [10]:
playlists_with_tracknames[pelle_id]

['Avenged Sevenfold - Natural Born Killer',
 'Deftones - Diamond Eyes',
 'Killswitch Engage - Rose Of Sharyn',
 "Avenged Sevenfold - I Won't See You Tonight Part 2",
 'Slipknot - Before I Forget',
 'Avenged Sevenfold - The Stage',
 'Chevelle - The Red',
 'Five Finger Death Punch - If I Fall',
 'Five Finger Death Punch - Wrong Side Of Heaven',
 'Killswitch Engage - My Curse',
 'Killswitch Engage - Always',
 'Deftones - Kimdracula',
 'Killswitch Engage - Eye of the Storm',
 'Avenged Sevenfold - Beast and the Harlot',
 'Metallica - The Unforgiven II',
 'Slipknot - Psychosocial',
 'Slipknot - Duality',
 'Avenged Sevenfold - And All Things Will End',
 'Avenged Sevenfold - Planets',
 'Avenged Sevenfold - Requiem']

### Item based collaborative filtering
1k playlists would take 18 hours to calculate on a ryzen 3800x......

In [11]:
song_playlist_matrix = np.array(make_item_based_vectors(playlist_vectors, unique_songs))

print("Matrix shape (unique_songs, playlists):",song_playlist_matrix.shape)

Matrix shape (unique_songs, playlists): (9641, 200)


In [12]:
def time_taken(t0, t1, dec=2):
    return round(t1-t0, dec)
t0 = time.time()
song_similarities = (cosine_similarity(song_playlist_matrix[1000], pl_vector_j)
                    for pl_vector_j in song_playlist_matrix)

t1 = time.time()
print("Time taken:", time_taken(t0, t1))

Time taken: 0.0


In [13]:
sum(song_similarities)

63.0

In [14]:
w = (asd for asd in [4,6,7,3,2,4,45,56])
for i, j in enumerate(w):
    print(i, j)

0 4
1 6
2 7
3 3
4 2
5 4
6 45
7 56


In [15]:
import multiprocessing as mp 
bigboi = np.empty(shape=(song_playlist_matrix.shape[0],song_playlist_matrix.shape[0]))

def do_print(s):
    foo = tuple((cosine_similarity(s[1], pl_vector_j)
                    for pl_vector_j in song_playlist_matrix))
    print("s", s[0], "done")
    return (s[0], foo)


t0 = time.time()
p=mp.Pool(14)
items_user_tuple = p.map(do_print,enumerate(song_playlist_matrix))
t1 = time.time()
print("Time taken:", time_taken(t0, t1))
p.close()
p.join()

s 2076 done
s 346 done
s 0 dones 519
 sdone
 865 done
s s1211 done
 s 17301384  donedone

s 2249 done
s 692s s done173
 done
 s 15571038  done
done
s 1903 done
s 347 done
s 2077 done
s 1 done
s 520 done
s 693 done
s 1212 done
s 1385 done
s 1039 dones
 1731 done
s 866 done
s 1558 done
s 2250 done
s 174 done
s 1904 done
s 348 done
s 2078 done
s 2 done
s 175 done
s 867 sdone 
694 done
s 521 done
s1905  dones
 s1213 1386  donessdone 
 
 1732s1040s  2251 done
 1559 
donedone
done
s 349 done
s 3 done
s 2079 done
s 176 done
s 1041 done
s 695 done
s 522 done
s 1733 sdone 
868 done
s 1906 done
s s1214 done
 1387 done
s 2252s  1560done done

s 350 done
s 4 done
s 2080 done
s 177 done
s 1388 done
s 869 done
s 696 done
s 1042 done
s 523 done
s 1734 done
s 1561 done
s 1907 done
s 1215 done
s 2253 done
s 351 done
s 5 done
s 178 done
s 1389 done
ss 2254  done2081 
done
s 697 done
s 1043 done
s 870 done
s 524 done
s 1908 done
s 1735 done
s 1562s  sdone1216 
 352 done
done
s 6 done
s 1909s done
 525 do

s 1311 done
s 1484 done
s 448 done
s 794 done
s 2004 done
s 1832 done
s 275 done
s 967 done
s 1140 done
s 621 done
s 2177 done
s 1659 done
s 2351 done
s 105 done
s 1485 done
s 1312 done
s 449 done
s 1833 done
s 2005 done
s 795 done
s 276 done
s 1141 done
s 968 done
s 1660 done
s 2178 done
s 622 done
s 106 done
s 2352 done
s 796 done
s 1834 done
s 450 done
s 1486 done
s 1313 done
s 2006 done
s 277 done
ss  1142969 done 
done
s 1661 done
s s 107 done
2179 done
s 623 done
s 1487 done
s 797 done
s 1835 done
s 1314 done
s 2353 done
s 451 done
s 2007 done
s 278 done
s 1143 done
s 970 done
s 2354 done
s 1662 done
s 108 done
s 2180 done
s 624 done
s 1488 done
s 798 done
s 1836 done
s 1315 done
s 452 done
s 279 done
s 2008 done
s 1144 done
s 1663 done
ss 971  2355 done
ss  109 donedone

2181 done
s 625 done
s 1489 done
s 799 done
s 1316 done
s 1837 done
s 453 done
ss  2009280 done
 done
s 1145 done
s 1664 done
s 972 done
s 2356 done
s 1490 dones 110 done

s 2182 done
s 626
 dones 1317 done
s 80

s 3661 done
s 4179 done
s 4697 done
s 4352 done
s 2798 done
s 3489 done
s 4006 done
s 3144 done
s 3316 done
s 2971 done
s 2626 done
s 2453 done
s s 4525 done
2799 done
s 3834 done
s 4180 done
s 3662 done
s 4698 done
s 4353 done
s 3490 done
s 4007 done
s 2627 done
s 2800 dones
 3145 done
s 3317 done
s 2972 done
s 2454 done
s s4526  3835done done

s 4181 done
s 3663 done
s 4699 done
s 4354 done
s 3491 done
s 2628 done
s 4008 done
s 2801 done
s 3146 done
s 3318 done
s 2973 done
s 4182 done
s 2455 done
s 4527 done
s 3836 done
s 3664 done
s 4700 done
ss  43553492 done
 done
s 4009 done
s 2629 done
s 3319 done
s 2802 done
s 3147 done
s 2974 done
s 2456 done
s 4183 done
s 4701 done
s 4528 done
s 3665 done
s 3837 done
s 4356 done
s 3493 done
s 4010 done
s 3320 done
s 2630 done
s 2803 done
s 2457 done
s 3148 done
s 4529 done
s 2975 done
s 3666 done
s 4184 done
s 4702 done
s 3838 done
ss 3494  4357done 
done
s 4011 done
s 3321 done
s 2631 done
s 2804 done
s 3149 done
s 2458 done
s 3839 done
s 45

s s3586  4276 donedone

s 4622 done
s 3241 done
s 3758 done
s 4104 done
s 3069 done
s 2724 done
s 3415 done
s 4794 done
s 4448 done
s 2549 done
s 2897 done
s 4623 sdone 
4277 done
s 3931 done
s 3587 done
s 3242 done
s 3759 done
s 4105 done
s 3070 done
s 2725 done
s 3416 done
s 2550 done
s 4795 done
s 4449 done
s 2898 done
s 3932 done
s 4624 done
s 4278 done
s 3588 done
s 3243 done
s 3760 done
s 4106 done
s 3071 done
s 2551 done
s 2726 done
s 3417 done
s 4796 done
s 4450 done
s 3933 done
s 2899 done
s 4625 done
s 4279 done
s 3589 done
s 3244 sdone
 3761 done
s 3072 done
s 2552 done
s 4107 done
s 2727 done
s 3418 done
s 4797 done
s 2900 done
s 4451 done
s 3934 done
s 4626 done
s 3245 done
s 4280 done
s s 37623590  done
done
s 2553 done
s 3073 done
s 4108 done
s 2728 dones
 3419 done
s 2901 done
s 4798 done
s 3935 done
s 4452 done
s 4627 done
s 3246 done
s 4281 done
s 3763 done
s 3591 done
s 3074 done
s 4109 done
s 2554 done
s 3420 done
s 2729 done
s 2902 done
s 4628 done
s 4799 done
s 44

64505587  donedone

s 5932 done
s 5069 done
s 6278 done
s 5242 done
s 6623 done
s 5414 done
s 6104 done
s 6795 done
s 7142 done
s 5761 done
s 6968 doness  
50706624  donedone

s 4898 done
s 6279 done
ss 5588 done
 6451s 5933 done
 done
s 5243 done
s 5415 done
s 6105 done
s 6625 done
s 7143 done
s 6796 done
s 5762 done
s 6280 done
ss  4899 done5934
s done
 5071 done
s 6969 done
s 5589 done
s 6452 done
s 5244 done
s 5416 done
s 6106 done
s 6626 done
s 5072s  done
7144 done
s 6970 done
s 4900 done
s 6797 done
s 5763 done
s 6281 done
s 5935 done
s 5590 done
s 6453 done
s 5245 done
s 5417 done
s 6107 done
s 4901 done
s 6282 done
s 6627 done
s 6971 done
s 7145 done
s s6798 5073  done
done
s 5764 done
s 6454 done
s 5591 done
s 5936 done
s 5418 done
s 5246 done
s 4902 done
s 6108 done
s 6628 done
s 6283 done
s 6972 done
s 5074s done 7146 done

s 6799 done
s 5765 done
s 6455 done
s 5937 done
s 5592 done
s 5419 done
s 5247 done
s 4903 done
s 6109 done
s 6629 done
s 6284 done
s 6456 done
s 6973 d

s 6375 done
s 7064 done
s 6548 done
s 6891 done
s 6028 done
s 6722 done
s 4995 done
s 5339 done
s 7241 done
s 6204 done
s 5167 done
s 5513 done
s 5684 done
s 5857 done
s 6376 done
s 7065 done
s 6549 done
s 7242 done
s 6892 done
s 6029 done
s 6723 done
ss 4996 done
 5340 done
s 6205 done
s 5168 done
s 5858 done
s 5514 done
s 5685 done
s 6377 done
s 7066 done
s 6550 done
s 4997 done
s 7243 done
s 6030 done
s 6893 done
s 5859 done
s 6724 done
s 5341 done
s 5169 done
s 6206 done
s 5515 done
s 5686 done
s 6378 done
s 7067 done
s 7244 done
s 6551 done
s 4998 done
s 5860 done
s 6031 done
s 6894 done
s 6725 done
s 5342 done
s 5170 done
s 6207 done
s 5516 done
s 5687 done
s 6379 done
s 7245 done
s 6552 done
s 7068 done
s 5861 done
s 6032 done
s 4999 done
s 6895 done
s 6726 done
s 5517 done
s 5343 done
s 5171 done
s 6208 done
s 6380 done
s 5688 done
s 6553s  done7246
 done
s 7069 done
s 5862 done
s 6033 done
s 5000 sdone 6896
 done
s 5518 done
s 6727 done
s 5172 done
s 5344 done
s 6209 done
s 63

s 9240 done
s 8548 done
s 8376 done
s 7686 done
s 9410 done
s s8721  7343done
 done
s 8204 done
s 8895 done
s 7514 done
s 7858 done
s 9241 done
s 9067 done
s 8031 done
s 9585 done
s 8549 done
s 8377 done
s 9411 done
s 7687 done
s 7344 done
s 8722 done
s 8205 done
s 7515 done
s 8896 done
s 7859 done
s 9242 done
s 9068 done
s 8378 done
s 9586 done
s 7345 done
s 8032 done
s 8550 done
s 9412 done
s 7688 done
s 8723 done
s 8206 done
s 7516 done
s 8897 done
s s8551  done7860 done

s 9243 done
s 9069 done
s 9587 done
s 7346 done
s 8379 done
s 8033 done
s 9413 done
s 7689 done
s 8724 done
s 8207 done
s 7517 done
s 8898 done
s 7347 done
s 7861 done
s 9070 done
s 8552 done
s 9244 done
s 9588 done
s 8034 done
s 8380 dones 
7690s  done
9414 done
s 8725 done
s 7518 done
s 8208 done
s 8899 done
s 7348 done
s 7862 done
s 8035 done
s 8553 done
s 9245 done
s 9071 done
s 7691 done
s 9589 done
s 8381 done
s 9415 done
s 8209 done
s 7519 done
s 8726 done
s 8900 done
s 7349 done
s 7692 done
s 7863 done
s 85

In [16]:
sum(items_user_tuple[0][1])

128.1526042549165

In [17]:
unique_songs[:100]

['Lostboycrow - Where It All Goes',
 'EXO - CALL ME BABY',
 'Midnight Star - Curious',
 'The New Life Community Choir - I Believe - Live',
 'Japandroids - Younger Us',
 'Relient K - In Like A Lion (Always Winter)',
 'First Aid Kit - Master Pretender',
 'Lil Wayne - President Carter',
 'April March - Theme for the Lime Cafe',
 'Lil Pump - Iced Out (feat. 2 Chainz)',
 'Britt Nicole - Amazing Life',
 'Sabrepulse - A Girl I Know',
 'Jimmy Wayne - I Love You This Much',
 'PnB Rock - Selfish',
 'Twenty One Pilots - Truce',
 'J. Blackfoot - Taxi',
 'Chris Brown - Run It!',
 'DJ Cam - Show Your Love',
 'Mat Kearney - Heartbreak Dreamer',
 'Cosculluela - Te Busco (feat. Nicky Jam)',
 'Jose Luis Reyes - Esta Cayendo',
 'Frank Ocean - Solo',
 'Juan Luis Guerra 4.40 - La Bilirrubina - Live',
 'Plan B - Fanática Sensual',
 'Kanye West - Wolves',
 'Caleborate - Options',
 'Sylvan Esso - Wolf',
 'Metallica - Nothing Else Matters',
 '5 Seconds of Summer - The Girl Who Cried Wolf',
 'Tenth Avenue North

In [None]:
f = open("items_user_tuple.pickle", "wb")
pickle.dump(items_user_tuple, f)