In [1]:
import requests
import base64

In [2]:
CLIENT_ID = '8fd3e798bc61415db82f3e5504123aaa'
CLIENT_SECRET = '7062e81e45a247dd8db99b5b7de6ec45'

In [3]:
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

In [4]:
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [5]:
!pip install spotipy

Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.1.1-py3-none-any.whl.metadata (9.1 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.1.1-py3-none-any.whl (261 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.3/261.3 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: redis, spotipy
Successfully installed redis-5.1.1 spotipy-2.24.0


In [6]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df



In [7]:
playlist_id = '4IlbA17RKTZflJrbpMa1Mu'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                                           Track Name  \
0                                        Lo Maan Liya   
1                                   Agar Tum Saath Ho   
2   Hamari Adhuri Kahani (Title Track) [From "Hama...   
3   Ve Kamleya (From "Rocky Aur Rani Kii Prem Kaha...   
4                                  Roke Na Ruke Naina   
..                                                ...   
95        Tu Hi Yaar Mera (From "Pati Patni Aur Woh")   
96                                          Tu Mileya   
97              Mummy Nu Pasand (From "Jai Mummy Di")   
98                                         Bhula Dena   
99                                 Chori Kiya Re Jiya   

                                              Artists  \
0                                        Arijit Singh   
1                           Alka Yagnik, Arijit Singh   
2                         Jeet Gannguli, Arijit Singh   
3   Pritam, Arijit Singh, Shreya Ghoshal, Shadab F...   
4                             

In [8]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df

In [10]:
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

In [11]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key',
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [12]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=10):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

In [13]:
#import pandas as pd

#def hybrid_recommendations(input_song_name, num_recommendations=10, alpha=0.5):
 #   if input_song_name not in music_df['Track Name'].values:
  #      print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
   #     return

    #content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    #popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    #weighted_popularity_score = popularity_score * calculate_weighted_popularity(
     #   music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    #)

    #new_entry = pd.DataFrame({
     #   'Track Name': [input_song_name],
      #  'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
       # 'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
       # 'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
       # 'Popularity': [weighted_popularity_score]
    #})

    #hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    #hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    #hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    #return hybrid_recommendations

In [14]:
import pandas as pd

def hybrid_recommendations(input_song_name, num_recommendations=10, alpha=0.5):
    # Check if song is in the dataset
    if input_song_name not in music_df['Track Name'].values:
        # Handle case where song is not found: find similar songs based on attributes
        similar_songs = music_df[music_df['Track Name'].str.contains(input_song_name, case=False, na=False)]

        # If similar songs are found, use the first similar song
        if not similar_songs.empty:
            print(f"'{input_song_name}' not found, showing recommendations for a similar song.")
            input_song_name = similar_songs.iloc[0]['Track Name']
        else:
            # If no similar songs are found, fallback to default values
            print(f"'{input_song_name}' not found. Using default values for recommendations.")

            # Use default metadata (average popularity, genre, etc.)
            default_popularity = music_df['Popularity'].mean()
            default_release_date = music_df['Release Date'].mode()[0]  # Most common release date

            new_entry = pd.DataFrame({
                'Track Name': [input_song_name],
                'Artists': ['Unknown Artist'],
                'Album Name': ['Unknown Album'],
                'Release Date': [default_release_date],
                'Popularity': [default_popularity]
            })

            # Add the default entry as a recommendation
            return new_entry.sort_values(by='Popularity', ascending=False).head(num_recommendations)

    # Content-based recommendations for the found/updated song
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Calculate weighted popularity score
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    )

    # Create new entry for the input song with updated popularity
    new_entry = pd.DataFrame({
        'Track Name': [input_song_name],
        'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
        'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
        'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
        'Popularity': [weighted_popularity_score]
    })

    # Concatenate content-based recommendations and the new entry
    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    # Sort by popularity and remove the input song from the final list
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    return hybrid_recommendations.head(num_recommendations)


In [15]:
input_song_name = "Hamari Adhuri Kahani"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=10)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

'Hamari Adhuri Kahani' not found, showing recommendations for a similar song.
Hybrid recommended songs for 'Hamari Adhuri Kahani':
                                          Track Name  \
4                                          Aaj Bhi 2   
0               Mann Bharryaa 2.0 (From "Shershaah")   
9                    Tenu Sang Rakhna (From "Jigra")   
3                               Abhi Mujh Mein Kahin   
6                         Raanjhan (From "Do Patti")   
5                                  Baarish Lete Aana   
1                                   Lambiya Judaiyan   
8                                    Teri Ho Na Saki   
7  Ro Lain De (From "Rocky Aur Rani Kii Prem Kaha...   
2                                             Faasle   

                                             Artists  \
4                                      Vishal Mishra   
0                                     B Praak, Jaani   
9  Achint, Arijit Singh, Anumita Nadesan, Varun G...   
3                           