In [67]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("/kaggle/input/spotify-million-song-dataset/spotify_millsongdata.csv")
df = df[['artist', 'song', 'text']]
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

df = df.sample(1000).reset_index(drop=True)  # reduce size for memory
df.head()


Unnamed: 0,artist,song,text
0,Iggy Pop,Isolation,"Needed you, you were only using \r\nNeeding y..."
1,Devo,Red-Eye Express,Let me tell you a story \r\nLet me shovel som...
2,P!nk,Hell Wit Ya,What is this? I heard you've got a new miss \...
3,Christy Moore,Dunnes Stores,Close your eyes and come with me back to 1984 ...
4,Patti Smith,Hey Joe,"Honey, the way you play guitar makes me feel s..."


In [68]:
df = pd.read_csv("/kaggle/input/spotify-million-song-dataset/spotify_millsongdata.csv")
df = df[['artist', 'song', 'text']]
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)

# Sample 999 random songs
df_sampled = df.sample(999, random_state=42)

# Add Eleanor Rigby manually
rigby_row = df[df['song'] == "Eleanor Rigby"]

# Combine
df = pd.concat([df_sampled, rigby_row], ignore_index=True)
df.reset_index(drop=True, inplace=True)


In [69]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['text'])

similarity = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [70]:
def recommend_song_lyrics(song_name, df, similarity_matrix, top_n=5):
    if song_name not in df['song'].values:
        return f"❌ Song '{song_name}' not found in dataset."

    index = df[df['song'] == song_name].index[0]
    scores = list(enumerate(similarity_matrix[index]))
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    print(f"🎧 Lyrics-based Recommendations for '{song_name}':\n")
    for i in sorted_scores:
        rec_song = df.iloc[i[0]]
        print(f"🎵 {rec_song['song']} — {rec_song['artist']}")


In [71]:
recommend_song_lyrics("Blood In My Eyes", df, similarity)


🎧 Lyrics-based Recommendations for 'Blood In My Eyes':

🎵 Weird World — Backstreet Boys
🎵 Can You Hear Me — Enrique Iglesias
🎵 Stackin' Paper — ZZ Top
🎵 The Trickster — Radiohead
🎵 Let Me Tell You, Babe — Nat King Cole


In [72]:
# Show 20 songs and their artists currently in the dataset
#df[['song', 'artist']].head(20)
