In [None]:
#Spotify Recommendation System
#Section 1
'''
The code starts by importing two modules: `requests` (for making web requests) and `base64` (for encoding data). These modules help the code interact with web servers securely.

The code sets up the necessary information to authenticate with the Spotify API. It includes a specific "Client ID" and "Client Secret" provided by Spotify. These are like secret keys that identify the application making requests.

The Client ID and Client Secret are combined into a single string and then encoded using Base64. This encoding adds a layer of security when sending the credentials.

The code sends a request to the Spotify API to get an "access token." This token acts like a special key that allows the application to access certain parts of the Spotify API.

The code checks the response from the API. If the response has a status code of 200 (which means success), it extracts the "access_token" from the response. If not, it indicates that something went wrong.

If the access token is successfully obtained, the code can use this token in later requests to the Spotify API. It helps authenticate the application and allows it to access specific data or perform actions as authorized.

If there's a problem getting the access token (response status code is not 200), the code prints an error message and exits the program.

In simple terms, this code is like a "key exchange" with Spotify. It provides the necessary credentials (Client ID and Secret), gets a special access token in return, and this token can be used to unlock specific doors in the Spotify API for the application. If all goes well, it's a way to securely access Spotify's data and features programmatically. If there's an issue, it lets you know something went wrong.'''

In [1]:
#Section 1

import requests # This module allows making HTTP requests to web servers.
import base64 # This module provides functions for encoding and decoding data in Base64 format.

# Your Spotify Client ID and Client Secret
CLIENT_ID = 'Input your Client Id'
CLIENT_SECRET = 'Input your client secret'

# Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}" # Combines Client ID and Client Secret together seperated with a colon
client_credentials_base64 = base64.b64encode(client_credentials.encode()) #Encoding the combined client credentials string from the previous line into Base64 format.

# Request the access token
token_url = 'https://accounts.spotify.com/api/token' # Defining the URL for obtaining an access token from the Spotify API.
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}' # Defining the HTTP headers for the request, including the Authorization header with the Base64-encoded client credentials.
}
data = {
    'grant_type': 'client_credentials' # Specifying the grant type for the token request as "client_credentials".
}
response = requests.post(token_url, data=data, headers=headers) # Sending a POST request to the Spotify API's token endpoint with the specified headers and data.

if response.status_code == 200: # Checking if the response from the token request has a status code of 200
    access_token = response.json()['access_token'] # Extracting the "access_token" value from the JSON response, which represents the access token for making authenticated API calls.
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [2]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.23.0-py3-none-any.whl (29 kB)
Collecting redis>=3.5.3
  Downloading redis-4.6.0-py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.1/241.1 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting async-timeout>=4.0.2
  Downloading async_timeout-4.0.3-py3-none-any.whl (5.7 kB)
Installing collected packages: async-timeout, redis, spotipy
Successfully installed async-timeout-4.0.3 redis-4.6.0 spotipy-2.23.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
#Section 2

'''This section defines a function called get_trending_playlist_data that helps us gather information about songs 
from a specific Spotify playlist. We provide the ID of the playlist and an access token for permission to the
Spotify music data. The code uses the Spotipy library, which is like a tool to talk to Spotify. It gets the tracks 
from the playlist and extracts important details like the song's name, the artists who made it, the album it's 
from, and more. It even handles situations where some information might be missing. All this collected data is 
organized into a nice table called a DataFrame, making it easier for us to analyze and understand the music from 
the playlist. The function then returns this DataFrame for us to use and explore.'''

In [2]:
#Section 2
import pandas as pd
import spotipy #Imports the Spotipy library, which is a Python wrapper for the Spotify Web API.
from spotipy.oauth2 import SpotifyOAuth #Imports the SpotifyOAuth class from the Spotipy library, which is used for handling OAuth2 authentication with the Spotify API.

def get_trending_playlist_data(playlist_id, access_token): # Defines a function named get_trending_playlist_data that takes two parameters: playlist_id (the ID of the playlist to retrieve data from) and access_token (the access token used to authenticate with the Spotify API).
    # Set up Spotipy with the access token 
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))') # Retrieves the tracks from the specified playlist using the playlist_tracks method, which returns a list of track items. The fields parameter specifies the specific fields to include in the response.

    # Extract relevant information and store in a list of dictionaries
    music_data = [] # Empty list to store dictionaries containing track information
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Handling cases where certain information might not be available:
        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data) # After processing each track, the track_data dictionary is added to the music_data list.

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

In [None]:
#Section 3
'''In summary, this code retrieves track data from a specific Spotify playlist using the get_trending_playlist_data
function, stores the data in a Pandas DataFrame (music_df), and then prints the DataFrame to the console to show 
the track information in an organized format. This process allows you to analyze the tracks in the playlist and 
gain insights into the music it contains.'''






In [3]:
#Section 3
playlist_id = '37i9dQZF1DX76Wlfdnj7AP' # Unique code for spotify playlist we want to analyze got from the link: (https://open.spotify.com/playlist/37i9dQZF1DX76Wlfdnj7AP)

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                               Track Name                      Artists  \
0                         I'm Good (Blue)     David Guetta, Bebe Rexha   
1              FE!N (feat. Playboi Carti)  Travis Scott, Playboi Carti   
2                      Boy's a Liar Pt. 2    PinkPantheress, Ice Spice   
3   Quevedo: Bzrp Music Sessions, Vol. 52            Bizarrap, Quevedo   
4                         Me Porto Bonito  Bad Bunny, Chencho Corleone   
..                                    ...                          ...   
95                       PLAYA DEL INGLÉS         Quevedo, Myke Towers   
96                       One in a Million     Bebe Rexha, David Guetta   
97              Levitating (feat. DaBaby)             Dua Lipa, DaBaby   
98      family ties (with Kendrick Lamar)    Baby Keem, Kendrick Lamar   
99                   Lionheart (Fearless)      Joel Corry, Tom Grennan   

                               Album Name                Album ID  \
0                         I'm Good (Blue) 

In [4]:
#Checking for null values.
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [5]:
#Section 4
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler #MinMaxScaler is used for scaling features (columns) of the dataset to a specified range, often between 0 and 1.
from datetime import datetime 
from sklearn.metrics.pairwise import cosine_similarity #The cosine_similarity function is used to compute the cosine similarity between vectors.

data = music_df

In [6]:
#Section 5
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [11]:
#Section 6 
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features) # Scaling the values to be between 0 and 1.

In [12]:
#Section 7
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values: # Check the input_song_name is in music_df
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0] 

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs and sort by descending order
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [17]:
#Section 8
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values: # Check the input_song_name is in music_df
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Get the popularity score of the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    # Calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])

    # Combine content-based and popularity-based recommendations based on weighted popularity
    hybrid_recommendations = content_based_rec
    hybrid_recommendations = hybrid_recommendations.append({
        'Track Name': input_song_name,
        'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }, ignore_index=True)

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]


    return hybrid_recommendations


In [14]:
# Section 9
input_song_name = "I'm Good (Blue)"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'I'm Good (Blue)':
                   Track Name                                     Artists  \
2  FE!N (feat. Playboi Carti)                 Travis Scott, Playboi Carti   
3                Call It Love                     Felix Jaehn, Ray Dalton   
0                       REACT  Switch Disco, Ella Henderson, Robert Miles   
1               Where You Are                          John Summit, Hayla   
4       Rainfall (Praise You)                                   Tom Santa   

              Album Name Release Date  Popularity  
2                 UTOPIA   2023-07-28        93.0  
3           Call It Love   2022-09-16        83.0  
0                  REACT   2023-01-13        82.0  
1          Where You Are   2023-03-03        79.0  
4  Rainfall (Praise You)   2022-02-18        77.0  


  hybrid_recommendations = hybrid_recommendations.append({
