In [39]:
import pandas as pd

In [40]:
df = pd.read_csv("spotify.csv")

In [41]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [42]:
df.shape

(57650, 4)

In [43]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [44]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [45]:
df.head(5)

Unnamed: 0,artist,song,text
0,Ne-Yo,L.O.V.E,Ahh... Don't let me go. \r\nThis is a Jlack t...
1,George Jones,Back In My Baby's Arms Again,Written by D. Frazier and A. Owens \r\nThe ho...
2,Heart,Sweet Darlin',Sweet darlin' \r\nYou saved me \r\nI'm hummi...
3,Doobie Brothers,Down In The Track,I been workin' the whole day long \r\nOn thes...
4,Hillsong United,Grace Abounds,[Verse:] \r\nYou lay aside Your throne \r\nA...


In [46]:
df['text'][0]

"Ahh... Don't let me go.  \r\nThis is a Jlack track man.  \r\n  \r\n[Verse:]  \r\n  \r\nIf you listen to nothing else that I say,  \r\nHear me when I say this to you,  \r\nThat little voice in the back of your mind,  \r\nJust do what it tells you to do,  \r\nI saw her there,  \r\nCrying in two empty shot glasses,  \r\nCheers to herself then drink them down,  \r\nI should've just walked away,  \r\nBut there was something about her,  \r\n(Beautifully tragic)  \r\nTook a seat and said,  \r\n(So, you from outta town?)  \r\n  \r\n[Chorus:]  \r\n  \r\nI should've known she was crazy,  \r\nI've seen it in her eyes,  \r\nShe's crazy, (wohohoo)  \r\nI should've known she was crazy,  \r\nI did not realize  \r\nShe's crazy, (wohohoo)  \r\nI should've known she was crazy,  \r\n(She's crazy)  \r\n  \r\n[Verse 2:]  \r\n  \r\nShe looked at me,  \r\nAnd I swear the blood froze in my veins,  \r\nHer eyes pierced my heart and made it stop,  \r\nThe room started spinning,  \r\nAll of a sudden my vision w

In [47]:
df.shape

(5000, 3)

### Text Preprocessing / Text Cleaning

In [48]:
df['text'] = df['text'].str.lower().replace(r'^\w\s','').replace(r'\n',' ',regex = True)

In [49]:
df.tail(5)

Unnamed: 0,artist,song,text
4995,Boney M.,When A Child Is Born,a ray of hope flickers in the sky \r a tiny s...
4996,Paul Simon,Go Tell It On The Mountain,[chorus] \r hallelujah! \r go tell it on the...
4997,Horrible Histories,Dick Turpin Or Stand And Deliver,everyone thinks they know the story \r of dic...
4998,Ozzy Osbourne,No Easy Way Out,looking to the mirror \r from the other side ...
4999,Iron Maiden,Sea Of Madness,"out in the street somebody's crying, \r out i..."


In [50]:
import nltk
from nltk.stem.porter import PorterStemmer

In [51]:
stemmer = PorterStemmer()

In [52]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [53]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [54]:
df['text'].apply(lambda x: token(x))

0       ahh ... do n't let me go . thi is a jlack trac...
1       written by d. frazier and a. owen the honey be...
2       sweet darlin ' you save me i 'm hummin ' from ...
3       i been workin ' the whole day long on these ra...
4       [ vers : ] you lay asid your throne and to thi...
                              ...                        
4995    a ray of hope flicker in the sky a tini star l...
4996    [ choru ] hallelujah ! go tell it on the mount...
4997    everyon think they know the stori of dick turp...
4998    look to the mirror from the other side see her...
4999    out in the street somebodi 's cri , out in the...
Name: text, Length: 5000, dtype: object

In [55]:
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity 

In [56]:
tfid =TfidfVectorizer(analyzer='word', stop_words='english')

In [57]:
matrix = tfid.fit_transform(df['text'])

In [58]:
matrix

<5000x23618 sparse matrix of type '<class 'numpy.float64'>'
	with 265786 stored elements in Compressed Sparse Row format>

In [59]:
similar = cosine_similarity(matrix)

In [60]:
similar[0]

array([1.        , 0.00984394, 0.01001044, ..., 0.01632664, 0.0291246 ,
       0.02839526])

In [62]:
df[df['song']=='No Easy Way Out'].index[0]

4998

### Recommender Function

In [67]:
def recommender(song_name):
    idx = df[df['song']==song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])),reverse=True, key = lambda x:x[1])
    song = []
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [72]:
recommender("When A Child Is Born")

['When A Child Is Born',
 'At Last I Am Born',
 'Do Not Pass Me By',
 'Born In The U. S. A.']

In [78]:
import pickle

# Save the trained model
with open("similar.pkl", "wb") as file:
    pickle.dump(similar, file)

print("✅ Model saved successfully as 'similar.pkl'")

✅ Model saved successfully as 'similar.pkl'


In [79]:
import pickle

# Save the trained model
with open("df.pkl", "wb") as file:
    pickle.dump(df, file)

print("✅ Model saved successfully as 'df.pkl'")

✅ Model saved successfully as 'df.pkl'


In [73]:
import pickle

In [74]:
pickle.dump(similar, open("similarity", "wb"))

In [75]:
pickle.dump(df, open("df", "wb"))