In [1]:
# Imports
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
data_scaled_df = pd.read_csv("scaled_dataset.csv")  # Ensure this file is in your working directory
data_scaled_df['name_lower'] = data_scaled_df['name'].str.lower()  # Case-insensitive matching


In [2]:
def get_song_data(song_name: str):
    """
    Fetch the most recent entry for the song from the dataset based on song name (case-insensitive).
    """
    song_name = song_name.strip().lower()
    matches = data_scaled_df[data_scaled_df['name_lower'] == song_name]
    if not matches.empty:
        latest_song = matches.sort_values(by='year', ascending=False).iloc[0]
        print(f"✅ Found: {latest_song['name']} ({latest_song['year']}) by {latest_song['artists']}")
        return latest_song
    else:
        print(f"❌ Song '{song_name}' not found in the dataset.")
        return None


In [3]:
def recommend_songs(input_songs, num_recommendations=10):
    """
    Recommend similar songs using cosine similarity.
    """
    input_songs_data = [get_song_data(song) for song in input_songs]
    input_songs_data = [s for s in input_songs_data if s is not None]
    
    if not input_songs_data:
        print("⚠️ No valid input songs found.")
        return pd.DataFrame()

    input_features = pd.DataFrame(input_songs_data)[[
        'danceability', 'energy', 'key', 'loudness', 'mode',
        'speechiness', 'acousticness', 'instrumentalness',
        'liveness', 'valence', 'tempo'
    ]]
    
    all_features = data_scaled_df[[
        'danceability', 'energy', 'key', 'loudness', 'mode',
        'speechiness', 'acousticness', 'instrumentalness',
        'liveness', 'valence', 'tempo'
    ]]
    
    similarities = cosine_similarity(input_features, all_features)
    similarity_scores = np.mean(similarities, axis=0)

    song_names_years = [(s['name'].lower(), s['year']) for s in input_songs_data]
    mask = ~data_scaled_df.apply(lambda x: (x['name'].lower(), x['year']) in song_names_years, axis=1)

    data_scaled_df_filtered = data_scaled_df[mask].copy()
    data_scaled_df_filtered['similarity_score'] = similarity_scores[mask]

    recommended_songs = data_scaled_df_filtered.sort_values(
        by='similarity_score', ascending=False
    ).head(num_recommendations)

    print("\n🎵 Recommended Songs:")
    for idx, row in recommended_songs.iterrows():
        print(f"{row['name']} ({row['year']}) by {row['artists']} — Score: {row['similarity_score']:.4f}")
    
    return recommended_songs[['name', 'artists', 'year', 'similarity_score']]


In [4]:
input_songs = ['90210', 'Blinding Lights', 'Shape of You']
recommended = recommend_songs(input_songs)
recommended


✅ Found: 90210 (2015) by ['blackbear']
✅ Found: Blinding Lights (2020) by ['The Weeknd']
✅ Found: Shape of You (2017) by ['Ed Sheeran']

🎵 Recommended Songs:
You Calling My Name (2019) by ['GOT7'] — Score: 0.7053
Take Over Control (feat. Eva Simons) (2011) by ['Afrojack', 'Eva Simons'] — Score: 0.7005
Want to Want Me (2015) by ['Jason Derulo'] — Score: 0.6960
Die Another Day (2003) by ['Madonna'] — Score: 0.6942
Chante's Got A Man (1999) by ['Chanté Moore'] — Score: 0.6936
Extraño Mi Pueblo (1999) by ['Frank Reyes'] — Score: 0.6936
Drag Me Down (2015) by ['One Direction'] — Score: 0.6935
Crazy Kids (2012) by ['Kesha'] — Score: 0.6931
Cashin' Out (2012) by ['Ca$h Out'] — Score: 0.6911
Crush (1998) by ['Jennifer Paige'] — Score: 0.6901


Unnamed: 0,name,artists,year,similarity_score
140600,You Calling My Name,['GOT7'],2019,0.705299
73304,Take Over Control (feat. Eva Simons),"['Afrojack', 'Eva Simons']",2011,0.700519
18726,Want to Want Me,['Jason Derulo'],2015,0.696028
88939,Die Another Day,['Madonna'],2003,0.694204
104578,Chante's Got A Man,['Chanté Moore'],1999,0.693627
120634,Extraño Mi Pueblo,['Frank Reyes'],1999,0.693569
18662,Drag Me Down,['One Direction'],2015,0.693487
90658,Crazy Kids,['Kesha'],2012,0.693069
37051,Cashin' Out,['Ca$h Out'],2012,0.691107
34219,Crush,['Jennifer Paige'],1998,0.690143
