# Spotify MPD Collaborative Filtering
- Idé: Rekommendera ett namn för givna spellistan genom att ge den samma namn som den närmaste playlisten.
- Skapa en funktion som konverterar spellistorna att ha bara artisten och namnet i sig.

In [1]:
import json
import os
import numpy as np
import math
from typing import List, Tuple, Union
from collections import defaultdict
import random
import time
import pickle


class Collaborative_Recommender:
    Vector = List[int]

    def __init__(self, path: str, rewrite_data: bool = False, playlist_limit: int = 0) -> None:
        self.playlists = []
        self.path = path
        self.rewrite_data = rewrite_data
        self.load_data(path, playlist_limit)
        # Shuffle playlist as to reduce possible bias
        random.shuffle(self.playlists)

    def load_data(self, data_path: str, limit: int = 0):
        try:
            for filename in os.listdir(data_path):
                with open(os.path.join(data_path, filename), "rt", encoding="utf-8") as f:
                    self.playlists.extend(json.load(f)["playlists"])
            print(f"Loaded {len(os.listdir(data_path))} jsons")

            if limit > 0:
                self.playlists = self.playlists[:limit]
        except FileNotFoundError as e:
            print("File not found:", e)

    def extract_unique_songs(self):
        self.unique_songs = sorted(
            {
                f"{song['artist_name']} - {song['track_name']}"
                for playlist in self.playlists
                for i, song in enumerate(playlist["tracks"])
            }
        )
        print(f"Found {len(self.unique_songs)} unique songs")

    def extract_playlist_names(self):
        self.playlist_names = [playlist["name"] for playlist in self.playlists]

    def sample_tracks(self, pos: int, num: int):
        for s in self.playlists[pos]["tracks"][:num]:
            print(f"{s['artist_name']} - {s['track_name']}")

    def to_track_names(self, playlists):
        return [
            [f"{s['artist_name']} - {s['track_name']}" for i, s in enumerate(playlist["tracks"])]
            for playlist in playlists
        ]

    def make_playlist_vector(self, playlist: Union[dict, List[str]]) -> List[int]:
        """
        Given a list of interests, produce a vector whose ith element is 1
        if unique_songs[i] is in the list, 0 otherwise
        """
        playlist_ = []
        if isinstance(playlist, tuple):
            _, playlist_ = playlist
        else:
            playlist_ = playlist
        # Checking whether it's a dict or list, since one can supply a list of song names too
        if isinstance(playlist_, dict):
            return [
                1
                if song in [f"{s['artist_name']} - {s['track_name']}" for s in playlist_["tracks"]]
                else 0
                for song in self.unique_songs
            ]
        elif isinstance(playlist_, List):
            return [1 if song in playlist_ else 0 for song in self.unique_songs]

    def make_user_based_vectors(self):
        import multiprocessing as mp

        p = mp.Pool(14)
        t0 = time.time()

        try:
            if self.rewrite_data:
                print("Rewriting data: Generating vectors and dumping them")
                self.extract_unique_songs()
                self.playlist_vectors = p.map(self.make_playlist_vector, enumerate(self.playlists))
                vector_file = open("playlist_vectors.pickle", "wb")
                playlist_file = open("playlists.pickle", "wb")
                print("Pickling...")
                pickle.dump(self.playlist_vectors, vector_file)
                pickle.dump(self.playlists, playlist_file)
            else:
                vector_file = open("playlist_vectors.pickle", "rb")
                playlist_file = open("playlists.pickle", "rb")

                print("Pickled data found.")
                self.playlist_vectors = pickle.load(vector_file)
                self.playlists = pickle.load(playlist_file)
                self.extract_unique_songs()
        except (FileNotFoundError):
            print("Pickled data not found, generating vectors...")
            self.extract_unique_songs()
            self.playlist_vectors = [
                self.make_playlist_vector(playlist) for playlist in self.playlists
            ]
            vector_file = open("playlist_vectors.pickle", "wb")
            playlist_file = open("playlists.pickle", "wb")
            print("Pickling...")
            pickle.dump(self.playlist_vectors, vector_file)
            pickle.dump(self.playlists, playlist_file)
        finally:
            vector_file.close()
            playlist_file.close()
            self.named_playlists = self.to_track_names(self.playlists)

        t1 = time.time()
        print(f"Time taken: {round(t1-t0, 2)} s")
        print("playlist_vectors length:", len(self.playlist_vectors))
        print("Sum of the vectors(useless metric):", sum(np.array(self.playlist_vectors).flatten()))

    def make_item_based_vectors(self):
        self.song_playlist_matrix: List[List[int]] = [
            [playlist_vector[i] for playlist_vector in self.playlist_vectors]
            for i, _ in enumerate(self.unique_songs)
        ]

    def dot(self, v: Vector, w: Vector) -> float:
        """Computes v_1 * w_1 + ... + v_n * w_n"""
        assert len(v) == len(w), "vectors must be same length"

        return sum(v_i * w_i for v_i, w_i in zip(v, w))

    def cosine_similarity(self, v1: Vector, v2: Vector) -> float:
        return self.dot(v1, v2) / math.sqrt(self.dot(v1, v1) * self.dot(v2, v2))

    def compute_similarities(self, pv) -> List[float]:
        return [self.cosine_similarity(pv, pv_i) for pv_i in self.playlist_vectors]

    def sort_similar_playlists(self, similarities: List[float], user_id: int):
        id_similarity = [
            (id, similarity)
            for id, similarity in enumerate(similarities)
            if id != user_id and similarity > 0
        ]
        return sorted(id_similarity, key=lambda pair: pair[-1], reverse=True)

    def suggest_name(self, similarities: List[Tuple[int, float]], length: int = 4):
        suggested_name = ""
        for z in range(length):
            # print(f"Most similar playlist{[z]} name:", self.playlist_names[similarities[z][0]])
            suggested_name = " ".join([suggested_name, self.playlist_names[similarities[z][0]].split(" ")[0]])
        return suggested_name.strip()

    def user_based_suggestions(self, similarities, max_suggestions: int = 10):
        # Sum up the similarities
        suggestions: Dict[str, float] = defaultdict(float)
        sorted_similarities = self.sort_similar_playlists(similarities, self.user_id)
        suggested_name = self.suggest_name(sorted_similarities)
        print("Suggested name to this playlist is", suggested_name)
        
        for other_user_id, similarity in sorted_similarities:
            for song in self.named_playlists[other_user_id]:
                suggestions[song] += similarity

        # Convert them to a sorted list
        suggestions = sorted(suggestions.items(), key=lambda pair: pair[-1], reverse=True)

        # Exclude the user_id's supplied songs
        return [
            (suggestion, weight)
            for suggestion, weight in suggestions
            if suggestion not in self.named_playlists[self.user_id]
        ][:max_suggestions], suggested_name

    def user_based_recommendation(
        self, playlist: Union[dict, List[str]], limit: int = 10, user_id: int = -1
    ):
        self.user_id = user_id
        similarities = self.compute_similarities(self.make_playlist_vector(playlist))
        suggestions, name = self.user_based_suggestions(similarities=similarities, max_suggestions=limit)
        return suggestions, name

    def compute_song_similarities(self, song_id: int) -> List[float]:
        return [
            self.cosine_similarity(self.song_playlist_matrix[song_id], pl_vector_j)
            for pl_vector_j in self.song_playlist_matrix
        ]

    def most_similar_songs_to(self, song_id: int) -> List[Tuple[str, float]]:
        similarities = self.compute_song_similarities(song_id)
        song_similarity_pairs = [
            (self.unique_songs[other_song_id], similarity)
            for other_song_id, similarity in enumerate(similarities)
            if song_id != other_song_id and similarity > 0
        ]
        return sorted(song_similarity_pairs, key=lambda pair: pair[-1], reverse=True)

    def item_based_suggestions(self, playlist_vector: List[int], max_suggestions: int = 10):
        suggestions = defaultdict(float)
        for song_id, is_interested in enumerate(playlist_vector):
            if is_interested == 1:
                similar_songs = self.most_similar_songs_to(song_id)
                for song, similarity in similar_songs:
                    suggestions[song] += similarity

        suggestions = sorted(suggestions.items(), key=lambda pair: pair[-1], reverse=True)

        return [
            (suggestion, weight)
            for suggestion, weight in suggestions
            if suggestion not in self.named_playlists[self.user_id]
        ][:max_suggestions]

    def item_based_recommendation(
        self, playlist: Union[dict, List[str]], limit: int = 10, user_id: int = -1
    ):
        self.user_id = user_id
        playlist_vector = self.make_playlist_vector(playlist)
        suggestions = self.item_based_suggestions(
            playlist_vector=playlist_vector, max_suggestions=limit
        )
        return suggestions


In [2]:
recommender = Collaborative_Recommender("mpd/data_samples", rewrite_data=True)
recommender.make_user_based_vectors()
recommender.extract_unique_songs()
recommender.extract_playlist_names()
print("Sample tracks::")
recommender.sample_tracks(69, 5)
recommender.make_item_based_vectors()

Loaded 1 jsons
Rewriting data: Generating vectors and dumping them
Found 34250 unique songs
Pickling...
Time taken: 31.67 s
playlist_vectors length: 1000
Sum of the vectors(useless metric): 66666
Found 34250 unique songs
Sample tracks::
Madeleine Peyroux - La Vie En Rose
Cœur De Pirate - Drapeau blanc
Françoise Hardy - Comment te dire adieu - It Hurts To Say Goodbye
Cœur De Pirate - Comme des enfants
Cœur De Pirate - Crier tout bas


## Recommend songs to pelle

Pelle's playlist happens to be in the database already:

In [3]:
pelle_id = 69
print("Pelles own playlist:")
recommender.named_playlists[pelle_id]

Pelles own playlist:


['Madeleine Peyroux - La Vie En Rose',
 'Cœur De Pirate - Drapeau blanc',
 'Françoise Hardy - Comment te dire adieu - It Hurts To Say Goodbye',
 'Cœur De Pirate - Comme des enfants',
 'Cœur De Pirate - Crier tout bas',
 'Bénabar - Je suis de celles',
 "Carla Bruni - J'Arrive A Toi",
 'Camille - Au port',
 'Louane - Jeune',
 'Julien Doré - Paris-Seychelles',
 'Stacey Kent - La Vénus du mélo',
 'Stacey Kent - Chanson légère',
 "Vincent Scotto - J'Ai Deux Amours",
 'Yves Montand - Rue Saint-Vincent (Rose blanche)',
 "Yves Montand - Quand on s'balade",
 'Astrud Gilberto - Agua De Beber',
 'Joe Dassin - Les Champs-Elysées',
 'Pink Martini - Sympathique',
 "Stacey Kent - Jardin d'hiver",
 'Stan Getz - Corcovado',
 'Astrud Gilberto - Solo el Fin (For All We Know)',
 'Toshiyuki Yasuda Feat. Fernanda Takai - Águas de Março (feat. Fernanda Takai & Moreno Veloso)',
 'Isabelle Antena - Le Poisson Des Mers Du Sud',
 'Stacey Kent - Samba Saravah',
 'Jill Barber - Petite fleur',
 'Stacey Kent - Ces p

### User based recommendation
Give the recommender a playlist in the same form as the spotify data

In [4]:
pelles_playlist = recommender.playlists[pelle_id]
sug, sug_name = recommender.user_based_recommendation(pelles_playlist, 12, pelle_id)
print("Suggestions:")
display(sug)

Suggested name to this playlist is Reception Wes Mad Easy
Suggestions:


[('Faces - Ooh La La', 0.09759000729485333),
 ('Frankie Lymon & The Teenagers - Why Do Fools Fall in Love',
  0.055131784641997125),
 ('Marvin Gaye - How Sweet It Is (To Be Loved By You)', 0.055131784641997125),
 ("Stevie Wonder - Signed, Sealed, Delivered (I'm Yours)",
  0.055131784641997125),
 ('Stevie Wonder - For Once In My Life', 0.055131784641997125),
 ("OutKast - Dracula's Wedding", 0.055131784641997125),
 ('Patsy Cline - Crazy', 0.055131784641997125),
 ('Bobby Darin - Beyond The Sea', 0.055131784641997125),
 ('The Kinks - All Day and All of the Night', 0.055131784641997125),
 ('Bright Eyes - First Day Of My Life', 0.055131784641997125),
 ('Pilot - Magic - 2003 Remastered Version', 0.055131784641997125),
 ('Pharrell Williams - Happy - From "Despicable Me 2"', 0.055131784641997125)]

Or you can also supply the recommender with just a list of songs:

In [5]:
pelles_custom_playlist = ['Mariah Carey - All I Want for Christmas Is You',
 'Michael Bublé - Blue Christmas',
 'The Cheetah Girls - Cheetah-licious Christmas',
 'Pentatonix - Dance of the Sugar Plum Fairy',
 'Rascal Flatts - Deck The Halls',
 'Justin Bieber - Fa La La - (a cappella)',
 'Pentatonix - Hark! The Herald Angels Sing',
 'Dan + Shay - Have Yourself A Merry Little Christmas',
 'Pentatonix - Have Yourself a Merry Little Christmas',
 "Michael Bublé - It's Beginning To Look A Lot Like Christmas",
 'Michael Bublé - Jingle Bells (feat. The Puppini Sisters)',
 'Pentatonix - Mary, Did You Know?',
 'Hunter Hayes - Merry Christmas Baby - 2014 CMA Country Christmas Performance',
 'Michael Bublé - Mis Deseos/Feliz Navidad (Duet With Thalia)',
 'Justin Bieber - Mistletoe',
 'Bridgit Mendler - My Song For You (from "Good Luck Charlie")',
 'Jeff Foxworthy - Redneck 12 Days Of Christmas',
 'Michael Bublé - Silver Bells (feat. Naturally 7) - Bonus Track',
 "Pentatonix - That's Christmas to Me"]

print("The same playlist but supplied as a list of track names")
display(recommender.user_based_recommendation(pelles_custom_playlist, 12, pelle_id))

The same playlist but supplied as a list of track names
Suggested name to this playlist is christmas CHRISTMASSSSS Christmas christmas


[('Mariah Carey - All I Want for Christmas Is You', 2.4191432546568854),
 ('Justin Bieber - Mistletoe', 2.310871687706498),
 ('Pentatonix - Mary, Did You Know?', 2.147721400524209),
 ("Michael Bublé - It's Beginning To Look A Lot Like Christmas",
  2.1172252252004595),
 ('Michael Bublé - Jingle Bells (feat. The Puppini Sisters)',
  2.1014220920875157),
 ('Pentatonix - Dance of the Sugar Plum Fairy', 1.7973915360693093),
 ('Pentatonix - Have Yourself a Merry Little Christmas', 1.7056252425210847),
 ("Pentatonix - That's Christmas to Me", 1.5902851834345482),
 ('Pentatonix - Hark! The Herald Angels Sing', 1.540808113835019),
 ('Michael Bublé - Silver Bells (feat. Naturally 7) - Bonus Track',
  1.448386890985402),
 ('Michael Bublé - Blue Christmas', 1.381457879357657),
 ('Michael Bublé - Mis Deseos/Feliz Navidad (Duet With Thalia)',
  1.381457879357657)]

### Item based recommender


In [6]:
pelle_id = 69
pelles_playlist = recommender.playlists[pelle_id]
sug, sug_name = recommender.item_based_recommendation(pelles_playlist, 12, pelle_id)
print("Suggestions:")
display(sug)

Suggestions:


[('Beck - Debra', 1.1547005383792517),
 ("Beck - Think I'm In Love", 1.1547005383792517),
 ('Blind Pilot - Oviedo', 1.1547005383792517),
 ('Bobby Darin - Beyond The Sea', 1.1547005383792517),
 ('Frankie Lymon & The Teenagers - Why Do Fools Fall in Love',
  1.1547005383792517),
 ("Gil Mantera's Party Dream - Elmo's Wish", 1.1547005383792517),
 ('M83 - Reunion', 1.1547005383792517),
 ('Mark Ronson - I Suck (feat. Rivers Cuomo)', 1.1547005383792517),
 ('Marvin Gaye - How Sweet It Is (To Be Loved By You)', 1.1547005383792517),
 ("Neil Diamond - Cracklin' Rosie - Single Version", 1.1547005383792517),
 ("OutKast - Dracula's Wedding", 1.1547005383792517),
 ('Radical Face - Always Gold - Short Attention Span Mix', 1.1547005383792517)]

In [7]:
song_name = "Lana Del Rey - Born To Die"
song_id = -1
for i, song in enumerate(recommender.unique_songs):
    if song == song_name:
        print("Song is at index:", i, ":", recommender.unique_songs[i])
        song_id = i

recommender.most_similar_songs_to(song_id)

Song is at index: 17206 : Lana Del Rey - Born To Die


[('Lana Del Rey - Video Games', 0.7216878364870323),
 ('Lana Del Rey - Carmen', 0.5773502691896258),
 ('Lana Del Rey - Lolita', 0.5773502691896258),
 ('Lana Del Rey - Million Dollar Man', 0.5773502691896258),
 ('Lana Del Rey - Without You', 0.5773502691896258),
 ('Lana Del Rey - Lucky Ones', 0.47140452079103173),
 ('Lana Del Rey - National Anthem', 0.47140452079103173),
 ('Lana Del Rey - Off To The Races', 0.47140452079103173),
 ('5 Seconds of Summer - Close As Strangers', 0.4082482904638631),
 ('5 Seconds of Summer - Story Of Another Us', 0.4082482904638631),
 ('ABBA - Waterloo', 0.4082482904638631),
 ('Alex & Sierra - Say My Name', 0.4082482904638631),
 ('Alpine - Hands', 0.4082482904638631),
 ('Alpine - Heartlove', 0.4082482904638631),
 ('Alpine - Icy Poles', 0.4082482904638631),
 ('Alpine - Too Safe', 0.4082482904638631),
 ('Alpine - Villages', 0.4082482904638631),
 ('Ariana Grande - Sometimes', 0.4082482904638631),
 ('Ariana Grande - Thinking Bout You', 0.4082482904638631),
 ('Ari