In [4]:
from google.colab import userdata
Spotify_Client_Id = userdata.get('Sppotify_Client_Id')
Spotify_Client_Secret = userdata.get('Spotify_Client_Secret')

Accessing spotify through credentials(encoded) to get access token:

In [6]:
import requests
import base64

CLIENT_ID = Spotify_Client_Id;
CLIENT_SECRET = Spotify_Client_Secret;

client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully")
else:
    print("Error obtaining access token")
    exit()

Access token obtained successfully


Now that we have the access token, we will use spotify's python library for its web API:

In [7]:
!pip install spotipy

Collecting spotipy
  Downloading spotipy-2.23.0-py3-none-any.whl (29 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.0.4-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m252.0/252.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: redis, spotipy
Successfully installed redis-5.0.4 spotipy-2.23.0


Function to get playlist data using a public playlist ID:

In [10]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    sp = spotipy.Spotify(auth=access_token)

    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']


        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None


        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None


        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
        }

        music_data.append(track_data)

    df = pd.DataFrame(music_data)

    return df

Here we provide a playlist ID and get playlist data:

In [11]:
playlist_id = '2mdFnQyK9pHIv7klmMxoWH'

music_df = get_trending_playlist_data(playlist_id, access_token)

print(music_df)

                          Track Name                       Artists  \
0                       Lucid Dreams                    Juice WRLD   
1                         God's Plan                         Drake   
2                         Better Now                   Post Malone   
3         rockstar (feat. 21 Savage)        Post Malone, 21 Savage   
4            Congratulations - Remix    Post Malone, Quavo, Future   
..                               ...                           ...   
95                            Ridin'  Chamillionaire, Krayzie Bone   
96                       Knock Knock                       SoFaygo   
97                         On My Way                       Illijah   
98      Chosen (feat. Ty Dolla $ign)    Blxst, Tyga, Ty Dolla $ign   
99  Bad Things (with Camila Cabello)           mgk, Camila Cabello   

                  Album Name                Album ID                Track ID  \
0    Goodbye & Good Riddance  6tkjU4Umpo79wwkgPMV3nZ  285pBltuF7vW8TeWk8hdRR   

Checking for NULL values

In [12]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        1
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


Importing Libraries to be used:

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

Function to give preference to songs that are recently released by giving weights to each song:

In [14]:
def calculate_weighted_popularity(release_date):
    release_date = datetime.strptime(release_date, '%Y-%m-%d')
    time_span = datetime.now() - release_date
    weight = 1 / (time_span.days + 1)
    return weight

Normalizating audio features:

In [15]:
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key',
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

Function to generate content based recommendation based on users input song from the provided playlist:

In [16]:
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

The function combines the content-based recommendations with the input song’s information (track name, artists, album name, release date, and popularity) and its weighted popularity score. This step creates a DataFrame named combined_df that includes both the content-based recommendations and the input song’s data.
The DataFrame is then sorted in descending order based on the weighted popularity score. This step ensures that the most popular and relevant songs appear at the top of the recommendations:

In [25]:
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
  if input_song_name not in music_df['Track Name'].values:
   print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
   return

  content_based_rec = content_based_recommendations(input_song_name, num_recommendations)
  popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

  weighted_popularity_score = popularity_score * calculate_weighted_popularity(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])

  # Create a new row as a Series
  new_row = pd.Series({
      'Track Name': input_song_name,
      'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
      'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
      'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
      'Popularity': weighted_popularity_score
  })

  # Get content-based recommendations
  content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

  # Combine content-based recommendations with the new row
  combined_df = pd.concat([content_based_rec, new_row.to_frame().T], ignore_index=True)  # Convert new_row to DataFrame

  # Sort and filter recommendations
  combined_df = combined_df.sort_values(by='Popularity', ascending=False)
  combined_df = combined_df[combined_df['Track Name'] != input_song_name]

  return combined_df


Now to test, Input a song from the playlist and get recommendations:

In [26]:
input_song_name = "God's Plan"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'God's Plan':
       Track Name                   Artists              Album Name  \
0     Money Trees  Kendrick Lamar, Jay Rock  good kid, m.A.A.d city   
4    MIDDLE CHILD                   J. Cole            MIDDLE CHILD   
2             Lie                        NF              Perception   
3  Tha Crossroads      Bone Thugs-N-Harmony         E. 1999 Eternal   
1             7AM              Lil Uzi Vert             Luv Is Rage   

  Release Date Popularity  
0         2012         81  
4   2019-01-23         80  
2   2017-10-06         75  
3   1995-07-25         69  
1   2015-12-18         65  
