In [392]:
import requests
import base64
from urllib.parse import urlencode
from http.server import HTTPServer, BaseHTTPRequestHandler

In [394]:
access_token = None

class SpotifyAuthClient:
    def __init__(self, client_id, client_secret, redirect_uri):
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri
        self.auth_base_url = "https://accounts.spotify.com"
        self.api_base_url = "https://api.spotify.com/v1"
    
    def get_auth_url(self, scope=None):
        params = {
            'client_id': self.client_id,
            'response_type': 'code',
            'redirect_uri': self.redirect_uri,
            'scope': scope or '',
        }
        return f"{self.auth_base_url}/authorize?{urlencode(params)}"
    
    def get_token(self, code):
        auth_header = base64.b64encode(f"{self.client_id}:{self.client_secret}".encode()).decode()
        response = requests.post(
            f"{self.auth_base_url}/api/token",
            headers={
                'Authorization': f'Basic {auth_header}',
                'Content-Type': 'application/x-www-form-urlencoded',
            },
            data={
                'grant_type': 'authorization_code',
                'code': code,
                'redirect_uri': self.redirect_uri,
            }
        )
        if response.status_code == 200:
            return response.json()
        else: Exception(f"Erreur lors de la récupération du token: {response.text}")

In [396]:
def run_server(client, scope):
    auth_url = client.get_auth_url(scope)
    print(f"Ouvrez cette URL dans votre navigateur : {auth_url}")

    class RequestHandler(BaseHTTPRequestHandler):
        def do_GET(self):
            global access_token
            if "/callback" in self.path:
                query = self.path.split('?')[-1]
                params = dict(param.split('=') for param in query.split('&'))
                code = params.get('code')
                self.send_response(200)
                self.end_headers()
                self.wfile.write(b"Authentification reussie! Vous pouvez fermer cette fenetre")
                
                # get token
                token_info = client.get_token(code)
                access_token = token_info['access_token']
                print(access_token)
                return

    server = HTTPServer(('localhost', 8000), RequestHandler)
    print("Serveur en attente du callback...")
    server.handle_request()


In [398]:
if __name__ == "__main__":
    client_id = '6094c6c177374701a0abdb52fbb627ad'
    client_secret = '0723befe0b9249b9a7036423e27a0633'
    redirect_uri = 'http://localhost:8000/callback'
    scope = 'user-read-private user-read-email user-top-read'

    auth_client = SpotifyAuthClient(client_id, client_secret, redirect_uri)
    
    run_server(auth_client, scope)

Ouvrez cette URL dans votre navigateur : https://accounts.spotify.com/authorize?client_id=6094c6c177374701a0abdb52fbb627ad&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%3A8000%2Fcallback&scope=user-read-private+user-read-email+user-top-read
Serveur en attente du callback...


127.0.0.1 - - [20/Dec/2024 14:25:30] "GET /callback?code=AQAz4q3cu1V3R8J4ozmVsEAG1EZaRFxGD37w3qRhUNbhPu6VGJenNMrPhky1-zazD9x__4qz4x7fA4Aq1tPBHbY3IObH1DUSGSti4zqVwyowKfXOjWT7jJyMH8S-YlaxIgWLNJH9jUsZMLzJP-QAPeH30_2hkD0srlRtyRqCp7EAR8tv_kXW2klJaPQBM60Yhs-ecs6xaiu-z7uhya_HOBV4XHGvu7dA3kIHAF56NPqpyTSiqQ HTTP/1.1" 200 -


BQCuyPH1gRYzWV1SER-P6smM_ZNkUZeK_4hbcwcH6Y-dLHnkjhKZijsbuI5snQxcPyCGzM4PBrLS1x4o47yxbCr1THXZDSS-P_3n0MzntDxbIM8g2lrBiG7JsIK8mVegwMGs9Gwf4_th-96AuYQaluE5UO63b5Dcr_91NeO4jc6FHKwCOho-uzzfLzD4efroUKWGVsksXUsHR_kVxlVoO_f--2wqHvDTZFh9-8s


In [400]:
top_tracks = []

In [402]:
def get_top_tracks(limit=50, time_range="long_term"):
    global top_tracks
    if not access_token:
        print("Erreur : Aucun token d'accès disponible. Authentifiez-vous d'abord")
        return
    
    url = "https://api.spotify.com/v1/me/top/tracks"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"time_range": time_range, "limit": limit}
    
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        top_tracks = response.json()["items"] 
        print("Votre top tracks :")
        for idx, track in enumerate(top_tracks, start=1):
            print(f"{idx}. {track['name']} - {', '.join(artist['name'] for artist in track['artists'])}")
    else:
        print("Erreur :", response.status_code, response.json())


In [404]:
get_top_tracks()

Votre top tracks :
1. violet - Lexie Carroll
2. the sky looked nice today - Lexie Carroll
3. Right Now - SR-71
4. vampire - Olivia Rodrigo
5. With Me - Sum 41
6. stranger - Olivia Rodrigo
7. Wayfaring Stranger - Ashley Johnson, Troy Baker
8. What's Up Danger (with Black Caviar) - Blackway, Black Caviar
9. My Girl - The Temptations
10. The Rock Show - blink-182
11. Back To Me - The Rose
12. Overcompensate - Twenty One Pilots
13. Reckless - Madison Beer
14. The Emptiness Machine - Linkin Park
15. The Drum - Alan Walker
16. Pieces - Sum 41
17. In Too Deep - Sum 41
18. Shut Up - Alan Walker, UPSAHL
19. Landmines - Sum 41
20. The Craving (Jenna's version) - Twenty One Pilots
21. Fat Lip - Sum 41
22. Powered Up - Cobra Man
23. Still Waiting - Sum 41
24. Pure 90 (Rapping Queen) - Shaka Ponk, Cypress Hill
25. Here We Go Again - SR-71
26. Halibo - Billx, Dr. Peacock
27. SATIVA - Yvnnis
28. Elle pleut - Nekfeu, Nemir
29. My wing - Rave music edit - Billx, Otyken
30. GARE DU NORD - Yvnnis
31. War

In [406]:
import pandas as pd

tracks_data = []
for track in top_tracks:
    track_info = {
        'name': track['name'],
        'artists': ', '.join(artist['name'] for artist in track['artists']),
        'album': track['album']['name'],
        'popularity': track['popularity'],
        'id_track' : track['id']
    }
    tracks_data.append(track_info)

df_tracks = pd.DataFrame(tracks_data)

display(df_tracks)

Unnamed: 0,name,artists,album,popularity,id_track
0,violet,Lexie Carroll,violet,28,6wOypcYXXeROfeVQLZ0sVU
1,the sky looked nice today,Lexie Carroll,when the sun came up,28,3XlMpNZyNcqH7GxJw0NWH9
2,Right Now,SR-71,Now You See Inside,56,3iwsyTZzOWHZbHffBEyi1N
3,vampire,Olivia Rodrigo,GUTS,82,1kuGVB7EU95pJObxwvfwKS
4,With Me,Sum 41,Underclass Hero,60,1OTvWduKQV0MQd2gnGM9He
5,stranger,Olivia Rodrigo,GUTS (spilled),70,2U3jOPfO4wZZFaaWS4Dcj6
6,Wayfaring Stranger,"Ashley Johnson, Troy Baker",The Last of Us Part II: Covers and Rarities,44,3wR4Cnadsl0xnpEAYkr3my
7,What's Up Danger (with Black Caviar),"Blackway, Black Caviar",Spider-Man: Into the Spider-Verse (Soundtrack ...,68,5zsHmE2gO3RefVsPyw2e3T
8,My Girl,The Temptations,The Temptations Sing Smokey,78,745H5CctFr12Mo7cqa1BMH
9,The Rock Show,blink-182,Take Off Your Pants And Jacket,68,2ydUT1pFhuLDnouelIv4WH


In [408]:
tracks = pd.read_csv("./tracks_features.csv")
tracks.rename(columns={"id":"id_track"}, inplace=True)
tracks.head(5)

Unnamed: 0,id_track,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date
0,7lmeHLHBe4nmXzuXc0HDjk,Testify,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],1,1,False,0.47,...,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999,1999-11-02
1,1wsRitfRRtWyEapl0q22o8,Guerrilla Radio,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],2,1,True,0.599,...,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999,1999-11-02
2,1hR0fIFK2qRG3f3RF70pb7,Calm Like a Bomb,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],3,1,False,0.315,...,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999,1999-11-02
3,2lbASgTSoDO7MTuLAXlTW0,Mic Check,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],4,1,True,0.44,...,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999,1999-11-02
4,1MQTmpYOZ6fcMQc56Hdo7T,Sleep Now In the Fire,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],5,1,False,0.426,...,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999,1999-11-02


In [408]:
tracks = pd.read_csv("./tracks_features.csv")
tracks.rename(columns={"id":"id_track"}, inplace=True)
tracks.head(5)

Unnamed: 0,id_track,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date
0,7lmeHLHBe4nmXzuXc0HDjk,Testify,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],1,1,False,0.47,...,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999,1999-11-02
1,1wsRitfRRtWyEapl0q22o8,Guerrilla Radio,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],2,1,True,0.599,...,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999,1999-11-02
2,1hR0fIFK2qRG3f3RF70pb7,Calm Like a Bomb,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],3,1,False,0.315,...,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999,1999-11-02
3,2lbASgTSoDO7MTuLAXlTW0,Mic Check,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],4,1,True,0.44,...,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999,1999-11-02
4,1MQTmpYOZ6fcMQc56Hdo7T,Sleep Now In the Fire,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],5,1,False,0.426,...,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999,1999-11-02


In [410]:
df_tracks_all = pd.merge(df_tracks, tracks, on='id_track', how='inner')
df_tracks_all.head()

Unnamed: 0,name_x,artists_x,album_x,popularity,id_track,name_y,album_y,album_id,artists_y,artist_ids,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date
0,Right Now,SR-71,Now You See Inside,56,3iwsyTZzOWHZbHffBEyi1N,Right Now,Now You See Inside,1eDNzJLCxus27R8ZC2hS3z,['SR-71'],['7fTcs8dn9y6XuB2IqZMzBq'],...,0.0392,0.000289,7e-06,0.324,0.731,116.273,167200,4.0,2000,2000-06-20
1,Let Me Down Slowly,Alec Benjamin,Narrated For You,83,2qxmye6gAegTMjLKEBoR3d,Let Me Down Slowly,Narrated For You,6jKZplJpy21R5lHaYHHjmZ,['Alec Benjamin'],['5IH6FPUwQTxPSXurCrcIov'],...,0.0318,0.74,0.0,0.124,0.483,150.073,169354,4.0,2018,2018-11-16
2,Chemical Burns,"Sadistik, Lotte Kestner, Eyedea",Ultraviolet,28,673iPvgqnCCZdsYhA9qzE6,Chemical Burns,Ultraviolet,4KATPCuUBbfw2ExUd2NSIA,"['Sadistik', 'Eyedea', 'Lotte Kestner']","['3qgsChODA5iJM6AdA0rr21', '1AgWsZON0joqldki9f...",...,0.129,0.107,0.0,0.0513,0.254,98.109,237973,3.0,2014,2014-07-01


In [414]:
tracks_data = []
for track in top_tracks:
    track_info = {
        'name': track['name'],
        'artists': ', '.join(artist['name'] for artist in track['artists']),
        'album': track['album']['name'],
        'popularity': track['popularity'],
        'id_track' : track['id']
    }
    tracks_data.append(track_info)

df_tracks = pd.DataFrame(tracks_data)

display(df_tracks)

Unnamed: 0,name,artists,album,popularity,id_track
0,violet,Lexie Carroll,violet,28,6wOypcYXXeROfeVQLZ0sVU
1,the sky looked nice today,Lexie Carroll,when the sun came up,28,3XlMpNZyNcqH7GxJw0NWH9
2,Right Now,SR-71,Now You See Inside,56,3iwsyTZzOWHZbHffBEyi1N
3,vampire,Olivia Rodrigo,GUTS,82,1kuGVB7EU95pJObxwvfwKS
4,With Me,Sum 41,Underclass Hero,60,1OTvWduKQV0MQd2gnGM9He
5,stranger,Olivia Rodrigo,GUTS (spilled),70,2U3jOPfO4wZZFaaWS4Dcj6
6,Wayfaring Stranger,"Ashley Johnson, Troy Baker",The Last of Us Part II: Covers and Rarities,44,3wR4Cnadsl0xnpEAYkr3my
7,What's Up Danger (with Black Caviar),"Blackway, Black Caviar",Spider-Man: Into the Spider-Verse (Soundtrack ...,68,5zsHmE2gO3RefVsPyw2e3T
8,My Girl,The Temptations,The Temptations Sing Smokey,78,745H5CctFr12Mo7cqa1BMH
9,The Rock Show,blink-182,Take Off Your Pants And Jacket,68,2ydUT1pFhuLDnouelIv4WH


In [416]:
df_tracks_clean = df_tracks_all.filter(items=['popularity','danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo'])
df_tracks_clean.head(5)

Unnamed: 0,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,56,0.524,0.967,8,-3.322,1,0.0392,0.000289,7e-06,0.324,0.731,116.273
1,83,0.652,0.557,1,-5.714,0,0.0318,0.74,0.0,0.124,0.483,150.073
2,28,0.789,0.622,8,-6.878,0,0.129,0.107,0.0,0.0513,0.254,98.109


In [418]:
def get_genres_for_artist(artist_id):
    if not access_token:
        print("Erreur : Aucun token d'accès disponible. Authentifiez-vous d'abord")
        return []

    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {"Authorization": f"Bearer {access_token}"}

    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        artist_data = response.json()
        return artist_data.get("genres", [])
    else:
        print("Erreur :", response.status_code, response.json())
        return []

In [420]:
def add_genres_to_dataframe(df):
    genres_list = []
    for track_artists in df['artist_ids']:
        artist_ids = track_artists.strip("[]").replace("'", "").split(", ")
        track_genres = set()
        for artist_id in artist_ids:
            track_genres.update(get_genres_for_artist(artist_id))
        genres_list.append(", ".join(track_genres))
    
    df['genres'] = genres_list
    return df

In [424]:
if 'artist_ids' in df_tracks_all.columns:
    df_tracks_all['artist_ids'] = df_tracks_all['artist_ids'].astype(str)
    df_tracks_with_genres = add_genres_to_dataframe(df_tracks_all)
    df_tracks_clean = df_tracks_with_genres.filter(items=[
        'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode',
        'speechiness', 'acousticness', 'instrumentalness', 'liveness',
        'valence', 'tempo', 'genres'
    ])
    display(df_tracks_clean.head())
else:
    print("La colonne 'artist_ids' est manquante")

Unnamed: 0,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,genres
0,56,0.524,0.967,8,-3.322,1,0.0392,0.000289,7e-06,0.324,0.731,116.273,
1,83,0.652,0.557,1,-5.714,0,0.0318,0.74,0.0,0.124,0.483,150.073,"pov: indie, alt z, pop"
2,28,0.789,0.622,8,-6.878,0,0.129,0.107,0.0,0.0513,0.254,98.109,"battle rap, indie psych-pop, seattle hip hop, ..."


In [456]:
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

def recommend_tracks_from_tracks(df, track_id, max_recommendations=50):
    expected_features = ['popularity', 'danceability', 'energy', 'key', 'loudness',
                         'mode', 'speechiness', 'acousticness', 'instrumentalness',
                         'liveness', 'valence', 'tempo', 'genres']
    
    available_features = [col for col in expected_features if col in df.columns]
    
    if not available_features:
        print("Erreur : Aucune des colonnes attendues n'est disponible dans le DataFrame.")
        return pd.DataFrame()

    if track_id not in df['id_track'].values:
        print("Erreur : L'ID de la piste spécifiée n'existe pas dans le DataFrame.")
        return pd.DataFrame()

    if 'genres' in df.columns:
        mlb = MultiLabelBinarizer()
        genre_matrix = mlb.fit_transform(df['genres'].str.split(','))
        genre_df = pd.DataFrame(genre_matrix, columns=mlb.classes_, index=df.index)
        df = pd.concat([df, genre_df], axis=1)
        features_to_scale = [col for col in df.columns if col in expected_features and col != 'genres']
    else:
        genre_df = pd.DataFrame()  # Si les genres n'existent pas, on crée un DataFrame vide
        features_to_scale = [col for col in df.columns if col in expected_features]

    scaler = StandardScaler()
    df_scaled = df.copy()
    df_scaled[features_to_scale] = scaler.fit_transform(df_scaled[features_to_scale])

    track_features = df_scaled[df_scaled['id_track'] == track_id][features_to_scale + list(genre_df.columns)]

    df_scaled['similarity'] = cosine_similarity(
        df_scaled[features_to_scale + list(genre_df.columns)],
        track_features.values.reshape(1, -1)
    ).flatten()

    recommendations = df_scaled[df_scaled['id_track'] != track_id] \
        .sort_values(by='similarity', ascending=False) \
        .head(max_recommendations)

    return recommendations[['id_track', 'similarity', 'name']]

example_track_id = tracks.iloc[0]['id_track'] 
recommended_tracks = recommend_tracks_from_tracks(tracks, example_track_id)
display(recommended_tracks)


Unnamed: 0,id_track,similarity,name
39200,4FcxMxyUf962mCEQoX8C6y,0.990939,Daily Revolver
869326,2yLFKU0rLvvIOIF0aQmKfS,0.99051,Sleeping Somewhere Else
549958,3iXKSnl6hmYyn89qUFuiLz,0.990321,If Love's the Answer
88612,1Udy7blrWwKMOlpQKlQS82,0.98815,Digital On
983597,3DB9mlwmgdS3WfBFk6Q1UV,0.988013,Stardust
1130919,0MiEhQOPw9IlArjnSLe84w,0.986959,One in a Million
142049,6oWHbBzqhimiD9eD3fxLSl,0.986913,Zebra Lounge
722712,0k9MB2429OQshdQpLWX8Lf,0.98519,Continuity Fails
889922,1IlcBYJMVZCsd34luxystR,0.985106,En Tu Destierro
508735,47e6vGDs0638yHaid4JCMn,0.98491,All My Friends
