In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample dataset with metadata
data = {
    "song":   ["Blinding Lights", "Shape of You", "Bohemian Rhapsody", "Someone Like You", "Uptown Funk"],
    "artist": ["The Weeknd",      "Ed Sheeran",   "Queen", "Adele", "Bruno Mars"],
    "genre":  ["Pop",             "Pop",          "Rock", "Ballad", "Funk"],
    "mood":   ["Energetic",       "Romantic",     "Dramatic", "Sad", "Happy"],
    "energy": ["High",            "Medium",       "Medium",   "Low", "High"]
}

df = pd.DataFrame(data)

df


Unnamed: 0,song,artist,genre,mood,energy
0,Blinding Lights,The Weeknd,Pop,Energetic,High
1,Shape of You,Ed Sheeran,Pop,Romantic,Medium
2,Bohemian Rhapsody,Queen,Rock,Dramatic,Medium
3,Someone Like You,Adele,Ballad,Sad,Low
4,Uptown Funk,Bruno Mars,Funk,Happy,High


In [15]:

# Combine text features into a single string
df["features"] = df["genre"] + " " + df["mood"] + " " + df["energy"]

# TF-IDF Vectorization
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df["features"])

# Compute Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to recommend similar songs
def recommend_song(song_name, df, cosine_sim, top_n=2):
    if song_name not in df["song"].values:
        return "Song not found in the database."

    idx = df[df["song"] == song_name].index[0]  # Get song index
    sim_scores = list(enumerate(cosine_sim[idx]))  # Similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)  # Sort
    sim_scores = sim_scores[1:top_n+1]  # Get top matches excluding itself

    song_indices = [i[0] for i in sim_scores]

    return df["song"].iloc[song_indices].tolist()


In [17]:
# Example Usage
print(recommend_song("Uptown Funk", df, cosine_sim, 1))

['Blinding Lights']
