## Imports

In [82]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import csv
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
%matplotlib inline

## Credentials API Spotify

In [83]:
client_id = 'CLIENT_ID'
client_secret = 'CLIENT_SECRET'
redirect_uri = 'http://localhost:8888/callback'

spotify_client = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope='user-library-read playlist-modify-public'))

## Récupérer les chansons likées

In [84]:
liked_songs = spotify_client.current_user_saved_tracks(limit=50)

In [85]:
song_data = []

fieldnames = ['valence', 'year', 'acousticness', 'artists', 'danceability', 'duration_ms', 'energy',
 'explicit', 'id', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date', 'speechiness', 'tempo', 'uri']


for item in liked_songs['items']:
    track = item['track']
    audio_features = spotify_client.audio_features(track['id'])[0]
    track_details = spotify_client.track(track['id'])

    artists = ', '.join([artist['name'] for artist in track['artists']])
    
    song_info = {
        'id': track['id'],
        'uri': track['uri'],
        'name': track['name'],
        'artists': artists,
        'key': audio_features['key'],
        'danceability': audio_features['danceability'],
        'liveness': audio_features['liveness'],
        'valence': audio_features['valence'],
        'year': track_details['album']['release_date'][:4],
        'acousticness': audio_features['acousticness'],
        'duration_ms': audio_features['duration_ms'],
        'energy': audio_features['energy'],
        'explicit': int(track_details['explicit']),
        'instrumentalness': audio_features['instrumentalness'],
        'loudness': audio_features['loudness'],
        'mode': audio_features['mode'],
        'popularity': track_details['popularity'],
        'speechiness': audio_features['speechiness'],
        'tempo': audio_features['tempo'],
        'release_date': track_details['album']['release_date']
    }
    
    song_data.append(song_info)

csv_filename = 'liked_songs.csv'
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(song_data)

spotify_data = pd.read_csv(csv_filename)

## K-means pour selectionner les sons qui iront dans la playlist

In [86]:
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()), 
                                  ('kmeans', KMeans(n_clusters=20, 
                                   verbose=2))], verbose=True)
X = spotify_data.select_dtypes(np.number)
number_cols = list(X.columns)
print(number_cols)
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
spotify_data['cluster_label'] = song_cluster_labels

['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']
[Pipeline] ............ (step 1 of 2) Processing scaler, total=   0.0s
Initialization complete
Iteration 0, inertia 298.7958995531692.
Iteration 1, inertia 186.92680980080502.
Iteration 2, inertia 184.44596298608568.
Converged at iteration 2: strict convergence.
Initialization complete
Iteration 0, inertia 296.3417293167109.
Iteration 1, inertia 199.45691993233453.
Converged at iteration 1: strict convergence.
Initialization complete
Iteration 0, inertia 296.591962724927.
Iteration 1, inertia 196.3165850426135.
Iteration 2, inertia 191.7498124133591.
Converged at iteration 2: strict convergence.
Initialization complete
Iteration 0, inertia 287.67163198603043.
Iteration 1, inertia 192.3615079185568.
Iteration 2, inertia 189.39683383153005.
Converged at iteration 2: strict convergence.
Initialization comp

  super()._check_params_vs_input(X, default_n_init=10)


In [87]:
number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo' ]

def get_song_data(song, spotify_data): 

    song_data = spotify_data[(spotify_data['name'] == song['name']) 
                            & (spotify_data['year'] == song['year'])].iloc[0]
    return song_data

        

def get_mean_vector(song_list, spotify_data):
    
    song_vectors = []
    
    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)

def flatten_dict_list(dict_list):
    
    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []
    
    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict
        

def recommend_songs( song_list, spotify_data, n_songs=10):
    
    metadata_cols = ['name', 'year', 'artists', 'uri']
    song_dict = flatten_dict_list(song_list)
    
    song_center = get_mean_vector(song_list, spotify_data)
    scaler = song_cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    
    rec_songs = spotify_data.iloc[index]
    #rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])] on enlève cette ligne pour ajouter les sons input dans la playlist
    return rec_songs[metadata_cols].to_dict(orient='records')

In [88]:
recommended = recommend_songs([{'name': "son 1", 'year': 2023}, {'name': "son 2", 'year': 2023}], spotify_data)

[{'name': 'dvsn interlude', 'year': 2023, 'artists': 'Rory, dvsn', 'uri': 'spotify:track:3x8V8IFJA0MPyuaYfse7Zd'}, {'name': 'Velvet Blue', 'year': 2023, 'artists': 'Ray Lozano', 'uri': 'spotify:track:1ppL3Tjizx9C6Vhd1TiIW0'}, {'name': 'HAUNTED Pt. 2', 'year': 2023, 'artists': 'Dswade808, Swade', 'uri': 'spotify:track:0Drl8lDcZjPSp2wYPZDmLd'}, {'name': 'Bahamas Promises', 'year': 2023, 'artists': 'Drake', 'uri': 'spotify:track:3JZjcKImHcmOI9ylL4zrSc'}, {'name': 'brush u', 'year': 2021, 'artists': 'Ragz Originale', 'uri': 'spotify:track:6tJBebmFkwDHCznVLzjIdc'}, {'name': 'Would You', 'year': 2022, 'artists': 'Kehina, emil', 'uri': 'spotify:track:2V9aEWXPvRmprVTZmTYf1J'}, {'name': '3 Peat', 'year': 2022, 'artists': 'Ambré', 'uri': 'spotify:track:5jyd1eGbvJtMThbVT13Dus'}, {'name': '7969 Santa', 'year': 2023, 'artists': 'Drake', 'uri': 'spotify:track:1GpWY5RiInhezB8wGWs6oN'}, {'name': 'Comme ça', 'year': 2023, 'artists': 'Rsko', 'uri': 'spotify:track:5FLLf89AfmWazFDM6ojiGk'}, {'name': 'Memb



## Créer la playlist

In [89]:
playlist_name = 'faire couler le miel'
spotify_client.user_playlist_create(user=spotify_client.me()['id'], name=playlist_name)
playlist_id = spotify_client.current_user_playlists(limit=1)['items'][0]['id']

## Ajouter les sons selectionnés

In [90]:
track_uris = [track['uri'] for track in recommended]
spotify_client.user_playlist_add_tracks(user=spotify_client.me()['id'], playlist_id=playlist_id, tracks=track_uris)

{'snapshot_id': 'MiwwMmZkNzM5MDkyNWNjMWY2MzFjNTljNDVkYjlkMWZmMjI4MzgwMGZl'}