## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import csv
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import time
from spotipy import SpotifyException
%matplotlib inline

## Credentials API Spotify

In [2]:
client_id = 'CLIEN_ID'
client_secret = 'CLIENT_SECRET'
redirect_uri = 'http://localhost:8888/callback'

spotify_client = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope='user-library-read playlist-modify-public'))

## Récupérer les chansons likées

In [3]:
limit = 50  # nombre max de tracks par request
offset = 0  # commencer par les 50 premiers sons

liked_songs = []

while offset < 500: # on récupère les 500 derniers sons likés
    results = spotify_client.current_user_saved_tracks(limit=limit, offset=offset)
    tracks = results['items']
    
    if not tracks:
        break  
    
    liked_songs.extend(tracks)
    offset += limit  

## Formater les chansons likées en csv

In [10]:
song_data = []

fieldnames = ['valence', 'year', 'acousticness', 'artists', 'danceability', 'duration_ms', 'energy',
 'explicit', 'id', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date', 'speechiness', 'tempo', 'uri']

liked_songs_split = [liked_songs[i:i+50] for i in range(0, len(liked_songs), 50)]


for group in liked_songs_split:
    track_ids = [item['track']['id'] for item in group]

    audio_features = spotify_client.audio_features(track_ids)
    track_details = spotify_client.tracks(track_ids)['tracks']

    for i, item in enumerate(group):
        audio_feature = audio_features[i]
        details = track_details[i]

        artists = ', '.join([artist['name'] for artist in details['artists']])

        song_info = {
            'id': item['track']['id'],
            'uri': item['track']['uri'],
            'name': item['track']['name'],
            'artists': artists,
            'key': audio_feature['key'],
            'danceability': audio_feature['danceability'],
            'liveness': audio_feature['liveness'],
            'valence': audio_feature['valence'],
            'year': details['album']['release_date'][:4],
            'acousticness': audio_feature['acousticness'],
            'duration_ms': audio_feature['duration_ms'],
            'energy': audio_feature['energy'],
            'explicit': int(details['explicit']),
            'instrumentalness': audio_feature['instrumentalness'],
            'loudness': audio_feature['loudness'],
            'mode': audio_feature['mode'],
            'popularity': details['popularity'],
            'speechiness': audio_feature['speechiness'],
            'tempo': audio_feature['tempo'],
            'release_date': details['album']['release_date']
        }

        song_data.append(song_info)

csv_filename = 'liked_songs.csv'
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(song_data)

spotify_data = pd.read_csv(csv_filename)

## K-means pour selectionner les sons qui iront dans la playlist

In [11]:
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()), 
                                  ('kmeans', KMeans(n_clusters=20, 
                                   verbose=2))], verbose=True)
X = spotify_data.select_dtypes(np.number)
number_cols = list(X.columns)
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
spotify_data['cluster_label'] = song_cluster_labels

[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s


  super()._check_params_vs_input(X, default_n_init=10)
found 0 physical cores < 1
  File "c:\Users\tafas\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


Initialization complete
Iteration 0, inertia 5038.258094944283.
Iteration 1, inertia 4014.784487399765.
Iteration 2, inertia 3853.6912732958067.
Iteration 3, inertia 3792.637399569749.
Iteration 4, inertia 3775.8549305764104.
Iteration 5, inertia 3763.658460588556.
Iteration 6, inertia 3756.2659285963446.
Iteration 7, inertia 3755.489675304496.
Iteration 8, inertia 3754.2052068413077.
Iteration 9, inertia 3753.459948784633.
Iteration 10, inertia 3753.1295557992057.
Iteration 11, inertia 3752.564345938242.
Iteration 12, inertia 3752.2647488467355.
Converged at iteration 12: strict convergence.
Initialization complete
Iteration 0, inertia 5344.319583104667.
Iteration 1, inertia 4075.018446561729.
Iteration 2, inertia 3952.376954396351.
Iteration 3, inertia 3892.475222569987.
Iteration 4, inertia 3852.7570572067843.
Iteration 5, inertia 3813.8736064807254.
Iteration 6, inertia 3771.4567092534203.
Iteration 7, inertia 3758.755206658794.
Iteration 8, inertia 3756.27293435268.
Iteration 9, i

In [12]:
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo' ]

def get_song_data(song, spotify_data): 

    song_data = spotify_data[(spotify_data['name'] == song['name']) 
                            & (spotify_data['year'] == song['year'])].iloc[0]
    return song_data

        

def get_mean_vector(song_list, spotify_data):
    
    song_vectors = []
    
    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)

def flatten_dict_list(dict_list):
    
    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []
    
    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict
        

def recommend_songs( song_list, spotify_data, n_songs=10):
    
    metadata_cols = ['name', 'year', 'artists', 'uri']
    song_dict = flatten_dict_list(song_list)
    
    song_center = get_mean_vector(song_list, spotify_data)
    scaler = song_cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    
    rec_songs = spotify_data.iloc[index]
    #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])] on commente ca pour inclure les sons qu'on donne en input dans la playlist
    return rec_songs[metadata_cols].to_dict(orient='records')

In [14]:
recommended = recommend_songs([{'name': "son 1", 'year': 2023}, {'name': "son 2", 'year': 2023}, {'name': "son 3", 'year': 2023}], spotify_data)



## Créer la playlist

In [15]:
playlist_name = 'ta nouvelle playliste'
spotify_client.user_playlist_create(user=spotify_client.me()['id'], name=playlist_name)
playlist_id = spotify_client.current_user_playlists(limit=1)['items'][0]['id']

## Ajouter les sons selectionnés

In [16]:
track_uris = [track['uri'] for track in recommended]
spotify_client.user_playlist_add_tracks(user=spotify_client.me()['id'], playlist_id=playlist_id, tracks=track_uris)

{'snapshot_id': 'Miw0YWRlZTM0OGQ5MzBlNzU0MzYwMjM2MjQ1M2NiOGYyMDYzODgwMmQz'}