## Imports

In [25]:
import numpy as np
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from collections import defaultdict
from scipy.spatial.distance import cdist
import plotly.express as px
import plotly.express as px
from sklearn.decomposition import PCA
from scipy.spatial.distance import euclidean
import random

## Créer un modèle prédictif

In [26]:
client_id = 'a390c89e42c947fd806e973ffb0015e6'
client_secret = 'e2d01d5846e44084923d09afac2ef3a1'
redirect_uri = 'http://localhost:8888/callback'

spotify_client = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri, scope='user-library-read playlist-modify-public'))

## Ajout des moyennes des caractéristiques des playlists au dataset

In [27]:
csv_filename = 'data/prepared_data.csv'
spotify_data = pd.read_csv(csv_filename)

## K-means clustering

In [28]:
numeric_columns = ['key', 'danceability', 'liveness', 'valence', 'year', 'acousticness', 'duration_ms',
                   'energy', 'explicit', 'instrumentalness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']

song_cluster_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('kmeans', KMeans(n_clusters=20, random_state=42))
])

X = spotify_data[numeric_columns]
song_cluster_pipeline.fit(X)

# Predict cluster labels for each song
spotify_data['cluster_label'] = song_cluster_pipeline.predict(X)


# Count the number of songs in each cluster for each playlist
playlist_cluster_distribution = spotify_data.groupby(['playlistname', 'cluster_label']).size().unstack(fill_value=0)


spotify_data.head()

Unnamed: 0,user_id,artistname,trackname,playlistname,id,artists,key,danceability,liveness,valence,...,energy,explicit,instrumentalness,loudness,mode,popularity,speechiness,tempo,release_date,cluster_label
0,9cc0cfd4d7d7885102480dd99e7a90d6,Elvis Costello,(The Angels Wanna Wear My) Red Shoes,HARD ROCK 2010,0GmLrYUBXDC5vti77zBZfJ,Elvis Costello,4,0.577,0.226,0.901,...,0.636,0,0.00016,-9.825,1,38,0.0491,134.768,1977-07-22,13
1,9cc0cfd4d7d7885102480dd99e7a90d6,Elvis Costello & The Attractions,"(What's So Funny 'Bout) Peace, Love And Unders...",HARD ROCK 2010,77XzsYwTkvLoveW01Lanrk,Elvis Costello & The Attractions,7,0.381,0.321,0.368,...,0.918,0,0.0511,-9.451,1,39,0.0803,142.34,2020-11-06,12
2,9cc0cfd4d7d7885102480dd99e7a90d6,Elvis Costello & The Attractions,Accidents Will Happen,HARD ROCK 2010,2Rv3hGdKSLeX30V55asE3s,Elvis Costello & The Attractions,0,0.608,0.285,0.736,...,0.597,0,0.00342,-11.115,1,36,0.0276,120.077,1979-01-05,13
3,9cc0cfd4d7d7885102480dd99e7a90d6,Elvis Costello,Alison,HARD ROCK 2010,1v98rfd0an913AzHvMNG8a,Elvis Costello,1,0.557,0.112,0.377,...,0.32,0,4e-05,-10.792,0,52,0.0438,176.647,1977-07-22,13
4,9cc0cfd4d7d7885102480dd99e7a90d6,Lissie,All Be Okay,HARD ROCK 2010,6wM5ZSUB7RMr7zYfGiO6d9,Lissie,3,0.582,0.0974,0.533,...,0.643,0,0.0,-6.047,1,6,0.0309,122.084,2021-06-04,2


In [29]:
# recommander une playlist à partir du nom d'une playliste.
def recommend_playlist_with_playlist(playlist_name, spotify_data, playlist_cluster_distribution):
    # Retrieve the songs in the given playlist
    playlist_songs = spotify_data[spotify_data['playlistname'] == playlist_name]
    
    # Get the cluster labels for the songs in the playlist
    playlist_cluster_labels = playlist_songs['cluster_label']
    
    # Calculate the distribution of clusters in the playlist
    playlist_cluster_dist = playlist_cluster_labels.value_counts().reindex(range(20), fill_value=0)
    
    closest_playlist = None
    closest_distance = float('inf')
    for playlist in playlist_cluster_distribution.index:
        # Get the distribution of clusters for the existing playlist
        existing_playlist_dist = playlist_cluster_distribution.loc[playlist].values
        
        # Calculate the distance between the distributions
        distance = euclidean(playlist_cluster_dist, existing_playlist_dist)
        
        print(f"Distance to {playlist}: {distance}")
        
        if distance < closest_distance and playlist != playlist_name:
            closest_distance = distance
            closest_playlist = playlist
    
    return closest_playlist


# recommander une playlist à partir d'une liste de chansons.
def recommend_playlist(song_list, spotify_data, playlist_cluster_distribution):
    
    song_clusters = spotify_data[spotify_data['trackname'].isin(song_list)]['cluster_label']
    
    song_cluster_dist = song_clusters.value_counts().reindex(range(20), fill_value=0)
    
    
    closest_playlist = None
    closest_distance = float('inf')
    for playlist in playlist_cluster_distribution.index:
        
        playlist_dist = playlist_cluster_distribution.loc[playlist].values
        distance = euclidean(song_cluster_dist, playlist_dist)
        
        print(f"Distance to {playlist}: {distance}")
        
        if distance < closest_distance:
            closest_distance = distance
            closest_playlist = playlist
    
    return closest_playlist



def visualize_clusters(spotify_data):
    pca = PCA(n_components=2)
    pca_result = pca.fit_transform(spotify_data[numeric_columns])
    pca_df = pd.DataFrame(data=pca_result, columns=['PC1', 'PC2'])
    pca_df['cluster_label'] = spotify_data['cluster_label']
    pca_df['name'] = spotify_data['trackname']
    pca_df['artists'] = spotify_data['artistname']
    
    fig = px.scatter(
        pca_df, 
        x='PC1', 
        y='PC2', 
        color='cluster_label', 
    )
    fig.update_layout(title='Clusters of Songs', xaxis_title='PC1', yaxis_title='PC2')
    fig.show()

visualize_clusters(spotify_data)

In [30]:
track_names = [
    "Brandenburg Concerto No.3 in G Major - 1st Movement",
    "Cello Suite No. 1 in G Major, BWV 1007: Prelude",
    "Für Elise",
    "Hungarian Dance No. 5",
    "Jesu, Joy Of Man's Desiring', BWV 147",
    "Moonlight Sonata: Adagio Sostenuto",
    "Overture (Suite) No. 3 in D Major, BWV 1068: II: Air on a G String",
    "Piano Sonata No. 11 in A major, K. 331: III. Rondo alla turca",
    "Piano Sonata No.14 In C Sharp Minor Op.27_2 - 1st Movement 'Moonlight",
    "Requiem In D Minor, Sequentia: Dies Irae",
    "Ride of the Valkyries",
    "Serenade No. 13 in G Major, K. 525 Eine Kleine Nachtmusik"": I: Allegro""",
    "Sonata No. 23 In F Minor, Op. 57 Appassionata"" Assai Allegro""",
    "Symphony No. 40 in G Minor, KV. 550: I: Molto allegro",
    "Symphony No. 5 In C Minor Part 1",
    "The Four Seasons - Autumn (Violin Concerto in F Major) RV 293: I. Allegro",
    "The Four Seasons - Spring (Violin Concerto in E Major), RV 269: I. Allegro",
    "The Four Seasons - Spring (Violin Concerto in E Major), RV 269: III. Allegro pastorale"
]

recommended_playlist = recommend_playlist(track_names, spotify_data, playlist_cluster_distribution)
print(f"The recommended playlist is: {recommended_playlist}")

Distance to 2014 beginning : 44.204072210600685
Distance to Annat: 15.620499351813308
Distance to April 2014: 52.1344415909483
Distance to August 2014: 14.594519519326424
Distance to Bored out of my mind, someone please get me out of this town. : 14.52583904633395
Distance to C418: 12.409673645990857
Distance to Chelsea: 17.944358444926362
Distance to Chill out: 57.55866572463264
Distance to Classique: 6.082762530298219
Distance to Daft Punk : 41.0
Distance to December 2013: 45.89117562233506
Distance to Disney..: 15.84297951775486
Distance to E.: 14.212670403551895
Distance to Ecuador: 18.65475810617763
Distance to Electro: 50.93132631298737
Distance to Electrooo : 15.165750888103101
Distance to Everything at once: 402.2275475399466
Distance to Fluff: 16.0312195418814
Distance to Ghibli songs: 8.246211251235321
Distance to Gillat från Radio: 13.379088160259652
Distance to Gracious: 15.0
Distance to HARD ROCK 2010: 25.39685019840059
Distance to HATK: 20.92844953645635
Distance to IOW 2

In [31]:
recommended_playlist = recommend_playlist_with_playlist("Classique", spotify_data, playlist_cluster_distribution)
print(f"The recommended playlist is: {recommended_playlist}")

Distance to 2014 beginning : 46.20606020859169
Distance to Annat: 20.904544960366874
Distance to April 2014: 53.9351462406472
Distance to August 2014: 19.493588689617926
Distance to Bored out of my mind, someone please get me out of this town. : 19.493588689617926
Distance to C418: 7.54983443527075
Distance to Chelsea: 22.693611435820433
Distance to Chill out: 57.79273310719956
Distance to Classique: 0.0
Distance to Daft Punk : 41.13392760240627
Distance to December 2013: 47.38143096192854
Distance to Disney..: 21.071307505705477
Distance to E.: 18.947295321496416
Distance to Ecuador: 23.259406699226016
Distance to Electro: 52.44997616777342
Distance to Electrooo : 20.566963801203133
Distance to Everything at once: 402.0273622528695
Distance to Fluff: 21.213203435596427
Distance to Ghibli songs: 13.74772708486752
Distance to Gillat från Radio: 19.28730152198591
Distance to Gracious: 20.445048300260872
Distance to HARD ROCK 2010: 28.948229652260256
Distance to HATK: 25.11971337416094
Di

## Créer un modèle de recommandation aléatoire

In [32]:
def recommend_random_playlist(spotify_data):
    random_playlist = random.choice(spotify_data['playlistname'].unique())
    return random_playlist

In [33]:
random_recommended_playlist = recommend_random_playlist(spotify_data)
print(f"The random recommended playlist is: {random_recommended_playlist}")

The random recommended playlist is: Fluff


## Comparer le score du modèle prédictif à celui du modèle aléatoire

In [34]:
# dans une fonction qui s'occupe des tests