In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
df = pd.read_csv("D:\\2_Extra\\Data Science\\Machine Learning Projects\\Music_Recommendation_Spotify\\playlist_data.csv")

In [4]:
df.head()

Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,...,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo
0,Zara Sa,"Pritam, KK",Jannat (Original Motion Picture Soundtrack),153Qw0xXJqhEFxvm9M9rE8,18YHbIhrleUkKKj2DvEp79,66,2008-03-07,303680,False,https://open.spotify.com/track/18YHbIhrleUkKKj...,...,0.91,11,-3.057,0,0.051,0.12,0.000304,0.271,0.694,163.842
1,Kya Mujhe Pyar Hai,KK,Woh Lamhe,4HQwIiFKCkiOCI6b9oAiAR,3HFjh7QljnUCBLsoTwMuTj,59,2006-08-17,266653,False,https://open.spotify.com/track/3HFjh7QljnUCBLs...,...,0.818,6,-5.924,0,0.0549,0.431,2.6e-05,0.0806,0.789,118.477
2,Ajab Si,KK,Om Shanti Om,2cUzlmLfL5LUTSEk7qG09k,5FXMRdJjKq1BIX4e8Eg9mK,62,2007-09-18,241668,False,https://open.spotify.com/track/5FXMRdJjKq1BIX4...,...,0.438,8,-8.411,1,0.0289,0.397,0.0,0.293,0.58,117.954
3,Sajde,"Pritam, KK, Sunidhi Chauhan",Khatta Meetha,0rA5WWDEVXDEizsgwS2hR4,4N66qcycDiFLJGjnXiKbKX,56,2010-06-21,305583,False,https://open.spotify.com/track/4N66qcycDiFLJGj...,...,0.638,11,-6.715,1,0.0498,0.354,3e-06,0.0972,0.747,90.005
4,Khuda Jaane,"Vishal-Shekhar, KK, Shilpa Rao, Anvita Dutt Gu...",Bachna Ae Haseeno,4mFLdK8SkUo3cx8ERLQC92,0gPgdRfB4jdGrlyXS0Vx78,67,2008-07-05,333182,False,https://open.spotify.com/track/0gPgdRfB4jdGrly...,...,0.634,8,-5.979,1,0.0398,0.411,0.000526,0.106,0.702,78.508


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 448 entries, 0 to 447
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Track Name        430 non-null    object 
 1   Artists           430 non-null    object 
 2   Album Name        430 non-null    object 
 3   Album ID          448 non-null    object 
 4   Track ID          448 non-null    object 
 5   Popularity        448 non-null    int64  
 6   Release Date      448 non-null    object 
 7   Duration (ms)     448 non-null    int64  
 8   Explicit          448 non-null    bool   
 9   External URLs     448 non-null    object 
 10  Danceability      448 non-null    float64
 11  Energy            448 non-null    float64
 12  Key               448 non-null    int64  
 13  Loudness          448 non-null    float64
 14  Mode              448 non-null    int64  
 15  Speechiness       448 non-null    float64
 16  Acousticness      448 non-null    float64
 1

In [6]:
df.shape

(448, 21)

# Calculate weighted popularity scores based on release date

In [7]:
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

   
    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days +1)
    return weight 

In [8]:
scaler = MinMaxScaler()
music_features = df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

In [9]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = df[df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = df.iloc[similar_song_indices][['Track Name','Artists','Album Name','Release Date','Popularity']]
    
    return content_based_recommendations

In [10]:
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    # Get the popularity score of the input song
    popularity_score = df.loc[df['Track Name'] == input_song_name, 'Popularity'].values[0]

    # Calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(df.loc[df['Track Name'] == input_song_name, 'Release Date'].values[0])

    # Combine content-based and popularity-based recommendations based on weighted popularity
    hybrid_recommendations = content_based_rec
    hybrid_recommendations = hybrid_recommendations.append({
        'Track Name': input_song_name,
        'Artists': df.loc[df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': df.loc[df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': df.loc[df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }, ignore_index=True)

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]


    return hybrid_recommendations

In [17]:
input_song_name = "Kya Mujhe Pyar Hai"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Kya Mujhe Pyar Hai':
          Track Name                                      Artists  \
4     Tu Hi Haqeeqat     Pritam, Javed Ali, Irshan Ashraf, Shadab   
0           Sanam Re                        Mithoon, Arijit Singh   
2  Mere Rashke Qamar  Nusrat Fateh Ali Khan, Rahat Fateh Ali Khan   
3           Touch Me            Pritam, KK, Alisha Chinai, Sameer   
1       Ishq Ki Gali          Rahat Fateh Ali Khan, Jayesh Gandhi   

                                      Album Name Release Date  Popularity  
4  Tum Mile (Original Motion Picture Soundtrack)   2009-09-08        62.0  
0                                       Sanam Re   2016-01-04        59.0  
2                                      Baadshaho   2017-08-19        58.0  
3                                        Dhoom:2   2006-10-19        47.0  
1                                Milenge Milenge   2010-06-11        33.0  


  hybrid_recommendations = hybrid_recommendations.append({
