# Spotify Playlist Data Extraction and Recommendation System

## Importing Necessary Libraries

In [1]:
import requests
import base64
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
%pip install spotipy
import spotipy
from spotipy.oauth2 import SpotifyOAuth

Note: you may need to restart the kernel to use updated packages.


## Spotify API Authentication

In [2]:
# Replace with your own Client ID and Client Secret
CLIENT_ID = 'your_client_id'
CLIENT_SECRET = 'your_client_secret'

In [3]:
# Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode()).decode()

In [4]:
# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


## Function to Retrieve Playlist Data from Spotify

In [5]:
def get_trending_playlist_data(playlist_id, access_token):
    """
    Fetches playlist data from Spotify and returns a DataFrame with relevant track information.

    Parameters:
    - playlist_id: Spotify playlist ID.
    - access_token: Spotify access token.

    Returns:
    - DataFrame containing track information and audio features.
    """

    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id else None

        # Get release date of the album
        release_date = None
        if album_id:
            try:
                album_info = sp.album(album_id)
                release_date = album_info['release_date']
            except:
                pass

        # Get popularity of the track
        popularity = None
        if track_id:
            try:
                track_info = sp.track(track_id)
                popularity = track_info['popularity']
            except:
                pass

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)
    return df

## Fetch and Display Trending Playlist Data

In [6]:
playlist_id = '37i9dQZF1DX76Wlfdnj7AP'  # Example playlist ID

music_df = get_trending_playlist_data(playlist_id, access_token)
print(music_df)
print("Null values in the DataFrame:\n", music_df.isnull().sum())

                               Track Name                             Artists  \
0                             Not Like Us                      Kendrick Lamar   
1   I Had Some Help (Feat. Morgan Wallen)          Post Malone, Morgan Wallen   
2           Guess featuring Billie Eilish           Charli xcx, Billie Eilish   
3                               Big Dawgs                  Hanumankind, Kalmi   
4                      I Don't Wanna Wait           David Guetta, OneRepublic   
..                                    ...                                 ...   
95                          Where You Are                  John Summit, HAYLA   
96      Princess Diana (with Nicki Minaj)              Ice Spice, Nicki Minaj   
97   HOT ONE (with TiaCorine & A$AP Ferg)  Denzel Curry, TiaCorine, A$AP Ferg   
98                      Edge of Seventeen                                Wuki   
99    Wanna Be (with Megan Thee Stallion)       GloRilla, Megan Thee Stallion   

                           

## Preprocessing and Feature Scaling

In [7]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

## Content-Based Recommendation System

In [8]:
def content_based_recommendations(input_song_name, num_recommendations=5):
    """
    Generates song recommendations based on content-based filtering using cosine similarity.

    Parameters:
    - input_song_name: Name of the input song.
    - num_recommendations: Number of recommendations to return.

    Returns:
    - DataFrame containing recommended songs.
    """
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return recommendations

## Hybrid Recommendation System

In [9]:
def calculate_weighted_popularity(release_date):
    """
    Calculates a weighted popularity score based on the release date of a song.
    
    Parameters:
    - release_date: The release date of the song in 'YYYY-MM-DD' format.

    Returns:
    - A weighted popularity score giving higher weight to more recent releases.
    """
    release_date = datetime.strptime(release_date, '%Y-%m-%d')
    time_span = datetime.now() - release_date
    weight = 1 / (time_span.days + 1)
    return weight

In [10]:

def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    """
    Combines content-based filtering with popularity weighting to generate hybrid song recommendations.

    Parameters:
    - input_song_name: Name of the input song.
    - num_recommendations: Number of recommendations to return.
    - alpha: Weighting factor for combining content-based and popularity-based recommendations.

    Returns:
    - DataFrame containing hybrid recommended songs.
    """
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Calculate the weighted popularity score for the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    )

    # Add the input song with its weighted popularity score to the recommendations
    new_entry = pd.DataFrame({
        'Track Name': [input_song_name],
        'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
        'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
        'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
        'Popularity': [weighted_popularity_score]
    })

    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    # Sort recommendations by popularity in descending order and exclude the input song
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    return hybrid_recommendations

## Example Usage of the Hybrid Recommendation System

In [11]:
input_song_name = "Not Like Us"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Not Like Us':
           Track Name         Artists  \
0         Lovin On Me     Jack Harlow   
2            euphoria  Kendrick Lamar   
1                Fuel     Eminem, JID   
3        Belly Dancer   Imanbek, BYOR   
4  Sunshine (My Girl)            Wuki   

                                Album Name Release Date  Popularity  
0                              Lovin On Me   2023-11-10        84.0  
2                                 euphoria   2024-04-30        84.0  
1  The Death of Slim Shady (Coup De Gr√¢ce)   2024-07-12        81.0  
3                             Belly Dancer   2022-02-18        79.0  
4                       Sunshine (My Girl)   2023-09-01        76.0  
