# Spotify Music Recommendation System Using Python

In [57]:
import requests
import base64

In [58]:
CLIENT_ID = '8388d4f880c642f3ad3a193a115497bc'
CLIENT_SECRET = '0118ab51d2e04f898b88fe0ed6e1bb89'

# Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())


In [38]:
# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


To fetch music data from any Spotify playlist, you can utilize the Spotipy library, a convenient Python wrapper for accessing Spotify's web API. Install it on your system by running the following command in your command prompt or terminal:

In [59]:
pip install spotipy

Note: you may need to restart the kernel to use updated packages.


This Python function, get_trending_playlist_data, uses the Spotipy library to retrieve detailed information from a specified Spotify playlist. 

It authenticates using an access_token and fetches track details like ID, name, artists, and album data using sp.playlist_tracks. 

The function compiles track attributes and audio features such as danceability, energy, and tempo into a music_data list.

This data is then organized into a pandas DataFrame, providing a structured, tabular view for analysis. The DataFrame format makes it easier to manipulate and explore the musical characteristics of each track.

In [60]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

A specific Spotify playlist, identified by the playlist_id '37i9dQZF1DX76Wlfdnj7AP', is targeted for data extraction. The function get_trending_playlist_data is called with this playlist_id and an access_token to authenticate the request. This function fetches detailed information about the tracks in the specified playlist. The resulting data is stored in a pandas DataFrame, music_df, which is then printed to display the collected information in a structured tabular format. This DataFrame includes track details, artists, albums, audio features, and more, facilitating analysis or further processing.

In [61]:
playlist_id = '37i9dQZF1DX76Wlfdnj7AP'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                          Track Name                       Artists  \
0                              Prada   cassö, RAYE, D-Block Europe   
1                           fukumean                         Gunna   
2                 IDGAF (feat. Yeat)                   Drake, Yeat   
3                    I'm Good (Blue)      David Guetta, Bebe Rexha   
4   Vois sur ton chemin - Techno Mix                       BENNETT   
..                               ...                           ...   
95                       Motley Crew                   Post Malone   
96      Kernkraft 400 (A Better Day)                    Topic, A7S   
97                          Remember      Becky Hill, David Guetta   
98                     All By Myself  Alok, Sigala, Ellie Goulding   
99             Rainfall (Praise You)                     Tom Santa   

                          Album Name                Album ID  \
0                              Prada  5MU0RmBSpoSxOPYBfcobDc   
1                   a Gift & a 

Checking if the data has any Null Values or not:

In [62]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


Let’s import the necessary Python libraries now:

In [63]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

Developing a function that strategically increases the weight of recently released songs in our recommendation algorithm. This function will adjust the recommendation scores based on the release date, giving newer songs a higher prominence. This method is crucial for maintaining an up-to-date and engaging user experience within our music recommendation system.

In [64]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

Let’s normalize the music features

In [65]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

The function content_based_recommendations generates music recommendations based on the features of a given input song. It first checks if the input song exists in the music DataFrame, music_df. If the song is found, it calculates similarity scores with other songs using cosine similarity on scaled music features. The function then identifies the indices of the most similar songs, based on these scores. Finally, it returns the details (like track name, artist, album, release date, and popularity) of the top recommended songs, up to a specified number (num_recommendations). This method ensures recommendations are closely aligned with the musical characteristics of the input song.

In [66]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

The hybrid_recommendations function blends content-based and popularity-based methods to suggest songs similar to a given input_song_name. It first fetches content-based recommendations and then calculates a weighted popularity score for the input song, considering its release date. The function merges these recommendations with the input song's weighted popularity into a new DataFrame. This DataFrame is sorted by the weighted popularity score, ensuring recommendations are both relevant and popular. Lastly, the input song is removed from the final recommendations to offer diverse options to the user.

In [67]:
import pandas as pd

def hybrid_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Get the row for the input song
    input_song_row = music_df[music_df['Track Name'] == input_song_name]

    # Calculate the weighted popularity score
    weighted_popularity_score = input_song_row['Popularity'].iloc[0] * calculate_weighted_popularity(input_song_row['Release Date'].iloc[0])

    # Create a new DataFrame for the input song with the weighted popularity
    input_song_df = input_song_row.copy()
    input_song_df['Popularity'] = weighted_popularity_score

    # Combine content-based and popularity-based recommendations
    hybrid_recommendations_df = pd.concat([content_based_rec, input_song_df])

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations_df = hybrid_recommendations_df.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations_df = hybrid_recommendations_df[hybrid_recommendations_df['Track Name'] != input_song_name]

    return hybrid_recommendations_df




Test the final function to generate music recommendations:

In [68]:
input_song_name = "I'm Good (Blue)"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)


Hybrid recommended songs for 'I'm Good (Blue)':
                    Track Name                                     Artists  \
7   FE!N (feat. Playboi Carti)                 Travis Scott, Playboi Carti   
52                Call It Love                     Felix Jaehn, Ray Dalton   
75                       REACT  Switch Disco, Ella Henderson, Robert Miles   
83               Where You Are                          John Summit, Hayla   
87                        BOTH                      Tiësto, 21 Savage, BIA   

       Album Name Release Date  Popularity Album ID Track ID  Duration (ms)  \
7          UTOPIA   2023-07-28        87.0      NaN      NaN            NaN   
52   Call It Love   2022-09-16        78.0      NaN      NaN            NaN   
75          REACT   2023-01-13        76.0      NaN      NaN            NaN   
83  Where You Are   2023-03-03        75.0      NaN      NaN            NaN   
87           BOTH   2023-08-29        74.0      NaN      NaN            NaN   

   Expli

In [50]:
data = music_df 
print(data)

                          Track Name                       Artists  \
0                              Prada   cassö, RAYE, D-Block Europe   
1                           fukumean                         Gunna   
2                 IDGAF (feat. Yeat)                   Drake, Yeat   
3                    I'm Good (Blue)      David Guetta, Bebe Rexha   
4   Vois sur ton chemin - Techno Mix                       BENNETT   
..                               ...                           ...   
95                       Motley Crew                   Post Malone   
96      Kernkraft 400 (A Better Day)                    Topic, A7S   
97                          Remember      Becky Hill, David Guetta   
98                     All By Myself  Alok, Sigala, Ellie Goulding   
99             Rainfall (Praise You)                     Tom Santa   

                          Album Name                Album ID  \
0                              Prada  5MU0RmBSpoSxOPYBfcobDc   
1                   a Gift & a 

In [52]:
input_song_name = "IDGAF (feat. Yeat)"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)


Hybrid recommended songs for 'IDGAF (feat. Yeat)':
                           Track Name                          Artists  \
15                         SICKO MODE                     Travis Scott   
33  Moth To A Flame (with The Weeknd)  Swedish House Mafia, The Weeknd   
42                  PUFFIN ON ZOOTIEZ                           Future   
80                                Lie                               NF   
92                  MONEY ON THE DASH              Elley Duhé, Whethan   

           Album Name Release Date  Popularity Album ID Track ID  \
15         ASTROWORLD   2018-08-03        84.0      NaN      NaN   
33     Paradise Again   2022-04-15        80.0      NaN      NaN   
42  I NEVER LIKED YOU   2022-04-29        79.0      NaN      NaN   
80         Perception   2017-10-06        75.0      NaN      NaN   
92  MONEY ON THE DASH   2023-01-20        73.0      NaN      NaN   

    Duration (ms) Explicit External URLs  ...  Energy  Key  Loudness  Mode  \
15            NaN