In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Load the file
df = pd.read_csv('spotify_millsongdata.csv')

# Drop empty lyrics
df = df.dropna(subset=['text'])

# NOTICE: I removed the line "df.sample" so it uses 100% of the data
print(f"Dataset loaded with {len(df)} songs.")

Dataset loaded with 57650 songs.


In [None]:
# Convert all lyrics to numbers
tfidf = TfidfVectorizer(stop_words='english')
matrix = tfidf.fit_transform(df['text'])

# Calculate similarity (This part is heavy on memory)
similarity = cosine_similarity(matrix)
print("Analysis complete.")

Analysis complete.


In [None]:
def recommend(song_name):
    if song_name not in df['song'].values:
        print(f"Song '{song_name}' not found.")
        return

    idx = df[df['song'] == song_name].index[0]
    distances = sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x: x[1])

    print(f"Recommendations for '{song_name}':")
    for i in distances[1:6]:
        print(f"- {df.iloc[i[0]].song} by {df.iloc[i[0]].artist}")

In [None]:
# Test 1
print("TEST 1: POP/HAPPY SONG ")
recommend("Ahe's My Kind Of Girl")

TEST 1: POP/HAPPY SONG 
Recommendations for 'Ahe's My Kind Of Girl':
- What Kind Of Girl by Air Supply
- Not That Kind Of Love by Alice Cooper
- The Messenger by Linkin Park
- Girl Like Mine by Roy Orbison
- Marilyn Monroe by Pharrell Williams


In [None]:
# Test 2
print("TEST 2: WORLD FAMOUS DICSO SONG")
recommend("Dancing Queen")

TEST 2: WORLD FAMOUS DICSO SONG
Recommendations for 'Dancing Queen':
- Queen In Love by Yngwie Malmsteen
- Air Dance by Black Sabbath
- Dancin' In The Wind by Hanson
- Dream Dancing by Kenny Rogers
- Dancing The Blues Away by Chris Rea


In [42]:
# Test 3
print("TEST 3: CLASSIC ROCK SONG")
recommend("Crazy World")

TEST 3: CLASSIC ROCK SONG
Recommendations for 'Crazy World':
- Church In The Wildwood by Alabama
- Be With You Awhile by Alice Cooper
- Take The Money And Run by Alan Parsons Project
- If You Go Away by Cyndi Lauper
- Give It Up by Alice Cooper


In [41]:
# Test 4
print("TEST 4: UPTIME POP SONG")
recommend("Gimme Gimme Gimme")

TEST 4: UPTIME POP SONG
Recommendations for 'Gimme Gimme Gimme':
- Too Much Pride by Chris Rea
- Amazing Grace by Dolly Parton
- Losing You (Track by Dusty Springfield
- Storm Front by Billy Joel
- All Talk, No Action by Bon Jovi


In [40]:
# Test 5
print("TEST 5: SOFT ACOUSTIC SONG")
recommend("Free As A Bumble Bee")

TEST 5: SOFT ACOUSTIC SONG
Recommendations for 'Free As A Bumble Bee':
- Valerie by Bruno Mars
- I Heard It Through The Grapevine by Creedence Clearwater Revival
- Square One by Coldplay
- Black Celebration by Depeche Mode
- Love Isn't Easy by ABBA
