In [1]:
import requests
import base64

In [2]:
CLIENT_ID = ' '
CLIENT_SECRET = ' '

In [3]:
# Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

In [4]:
# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [5]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

In [6]:
def get_trending_playlist_data(playlist_id, access_token):
    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

In [7]:
playlist_id = '37i9dQZF1DX14CbVHtvHRB'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

In [8]:
# Display the DataFrame
music_df

Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,...,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo
0,"Jaana Samjho Na (From ""Bhool Bhulaiyaa 3"")","Aditya Rikhari, Tulsi Kumar, Lijo George-Dj Ch...","Jaana Samjho Na (From ""Bhool Bhulaiyaa 3"")",02dVq1hK3asFuUjL0ulsGu,1PALLCSlHwAI4upY3sMg8u,73.0,2024-10-22,212108,False,https://open.spotify.com/track/1PALLCSlHwAI4up...,...,0.527,0,-7.601,1,0.0414,0.269,0.000002,0.1080,0.558,99.981
1,"Raanjhan (From ""Do Patti"")","Sachet-Parampara, Parampara Tandon, Kausar Munir","Raanjhan (From ""Do Patti"")",1poClftP5T3zRcqpsxPPfW,4vHRQnzGcKEtqsLH70tAms,77.0,2024-10-04,240066,False,https://open.spotify.com/track/4vHRQnzGcKEtqsL...,...,0.490,6,-7.328,1,0.0295,0.809,0.000030,0.1070,0.200,92.107
2,Tere Bina Na Guzara E,Josh Brar,Tere Bina Na Guzara E,2F1gMtjzItjOoTMgp9qakV,0NXnuTqFQHDsbEIoKIAVCN,74.0,2024-09-26,220800,False,https://open.spotify.com/track/0NXnuTqFQHDsbEI...,...,0.514,1,-9.401,1,0.0566,0.671,0.000000,0.3850,0.538,74.968
3,"Sajni (From ""Laapataa Ladies"")","Ram Sampath, Arijit Singh, Prashant Pandey","Sajni (From ""Laapataa Ladies"")",3I3kZyHUtEA9Y59rJkxtk6,5zCnGtCl5Ac5zlFHXaZmhy,79.0,2024-02-12,170045,False,https://open.spotify.com/track/5zCnGtCl5Ac5zlF...,...,0.413,0,-7.783,1,0.0281,0.827,0.000009,0.1080,0.355,79.646
4,"Tum Se (From ""Teri Baaton Mein Aisa Uljha Jiya"")","Sachin-Jigar, Raghav Chaitanya, Varun Jain, In...","Tum Se (From ""Teri Baaton Mein Aisa Uljha Jiya"")",3vVIhgkDoC0vRBba5drHPe,2ceeTJAzKy295Fm0VsaXtE,75.0,2024-02-02,264090,False,https://open.spotify.com/track/2ceeTJAzKy295Fm...,...,0.595,10,-6.356,1,0.0331,0.680,0.000001,0.1040,0.435,99.991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,Paniyon Sa,"Atif Aslam, Tulsi Kumar",Satyameva Jayate,5Ni5qgjE8gMZl4TXrbTwoV,7tl1En7BCSnD6Qb5OGlEIE,52.0,2018-08-01,236240,False,https://open.spotify.com/track/7tl1En7BCSnD6Qb...,...,0.768,0,-4.953,0,0.0566,0.447,0.000226,0.0833,0.669,75.030
71,Ik Vaari Aa,"Pritam, Arijit Singh",Raabta,05U9YwErzcnF0PmyPalDo5,7fyiYhn7STpHvT5g0lbncC,62.0,2017-06-03,274687,False,https://open.spotify.com/track/7fyiYhn7STpHvT5...,...,0.752,4,-4.857,0,0.0534,0.305,0.000000,0.7080,0.428,127.965
72,Chale Aana,"Armaan Malik, Kunaal Vermaa",De De Pyaar De,31lh22jsvWDf5ffgRAXaMs,4clW7QGXTCYU5F5dTyfFJ2,56.0,2019-05-15,271023,False,https://open.spotify.com/track/4clW7QGXTCYU5F5...,...,0.587,6,-6.676,0,0.0378,0.309,0.000000,0.1490,0.391,143.687
73,Mere Sohneya,"Sachet Tandon, Parampara Tandon",Kabir Singh,3uuu6u13U0KeVQsZ3CZKK4,7sJQv0LRPgM2xCkkdWOlyj,62.0,2019-06-14,193356,False,https://open.spotify.com/track/7sJQv0LRPgM2xCk...,...,0.643,9,-5.713,1,0.0411,0.558,0.000000,0.2170,0.646,136.228


In [9]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          1
Release Date        0
Duration (ms)       0
Explicit            1
External URLs       1
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
data = music_df

In [12]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [13]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [14]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [15]:
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    )

    new_entry = pd.DataFrame({
        'Track Name': [input_song_name],
        'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
        'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
        'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
        'Popularity': [weighted_popularity_score]
    })

    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    return hybrid_recommendations

In [16]:
input_song_name = "Chale Aana"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)

# Convert recommendations to a DataFrame and display it
recommendations_df = pd.DataFrame(recommendations)

In [17]:
recommendations_df

Unnamed: 0,Track Name,Artists,Album Name,Release Date,Popularity
1,Zihaal e Miskin,"Javed-Mohsin, Vishal Mishra, Shreya Ghoshal",Zihaal e Miskin,2023-05-25,71.0
2,Manjha,Vishal Mishra,Manjha,2020-03-17,71.0
4,Tere Pyaar Mein,"Pritam, Arijit Singh, Amitabh Bhattacharya, Ni...","Tere Pyaar Mein (From ""Tu Jhoothi Main Makkaar"")",2023-02-01,71.0
3,Pal,"Javed-Mohsin, Arijit Singh, Shreya Ghoshal, Ku...",Jalebi (Original Motion Picture Soundtrack),2018-09-21,66.0
0,"Naseeb Se (From ""Satyaprem Ki Katha"")","Payal Dev, Vishal Mishra, A.M. Turaz","Naseeb Se (From ""Satyaprem Ki Katha"")",2023-05-27,63.0
