In [40]:
import pandas as pd

In [41]:
df = pd.read_csv("spotify_millsongdata.csv")

In [42]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [43]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [44]:
df.shape

(57650, 4)

In [45]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [46]:
df = df.sample(5000).drop('link',axis=1).reset_index(drop=True)

In [47]:
df.head(5)

Unnamed: 0,artist,song,text
0,Nat King Cole,I Know That You Know,I know that you know \r\nThat I'll go where y...
1,Kirsty Maccoll,"Please Help Me, I'm Falling","Please help me, I'm falling in love with you ..."
2,Nirvana,Son Of A Gun,Take a step outside yourself \r\nAnd turn aro...
3,Chris Rea,There She Goes,"There she goes, the one that I love \r\nThe o..."
4,Ne-Yo,Back To What You Know,"[Verse 1] \r\nI don't know where to begin, \..."


In [48]:
df['text'][0]

"I know that you know  \r\nThat I'll go where you go  \r\nI choose you, won't lose you  \r\nI wish you knew how much I long  \r\nTo hold you in my arms  \r\n  \r\nThis time is my time  \r\nWill soon be goodbye time  \r\nThen in the star light, hold me tight  \r\nWith one more little kiss  \r\nSay, nighty night  \r\n  \r\nI know that you know  \r\nThat I'll go where you go  \r\nI choose you, won't lose you  \r\nI wish you knew how much I long  \r\nTo hold you in my arms  \r\n  \r\nThis time is my time  \r\nWill soon be goodbye time  \r\nThen in the star light, hold me tight  \r\nWith one more little kiss  \r\nSay, nighty night\r\n\r\n"

In [49]:
df.shape

(5000, 3)

In [50]:
# Text cleaning and Text preprocessing
df['text'] = df['text'].str.lower().replace(r'\w\s',' ').replace(r'\n', ' ',regex=True)

In [51]:
df.tail(5)    

Unnamed: 0,artist,song,text
4995,Nick Cave,God Is In The House,we've laid the cables and the wires \r we've ...
4996,Weird Al Yankovic,Stop Draggin' My Car Around,had to park my car for just five minutes. \r ...
4997,Leonard Cohen,The Throne Of Desire,"(written by cohen, performed in ""night magic"")..."
4998,Bee Gees,Big Chance,people in their pride believe it's easier for ...
4999,Alison Krauss,Could You Lie,go on and lie i've heard it all before \r you...


In [52]:
import nltk
from nltk.stem.porter import PorterStemmer

In [53]:
stemmer = PorterStemmer()

In [54]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return ' '.join(a)

In [55]:
token('You are beautiful')

'you are beauti'

In [56]:
df['text'].apply(lambda x: token(x))

0       i know that you know that i 'll go where you g...
1       pleas help me , i 'm fall in love with you clo...
2       take a step outsid yourself and turn around ta...
3       there she goe , the one that i love the one i ...
4       [ vers 1 ] i do n't know where to begin , figh...
                              ...                        
4995    we 've laid the cabl and the wire we 've split...
4996    had to park my car for just five minut . i had...
4997    ( written by cohen , perform in `` night magic...
4998    peopl in their pride believ it 's easier for t...
4999    go on and lie i 've heard it all befor you had...
Name: text, Length: 5000, dtype: object

In [57]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [58]:
tfid = TfidfVectorizer(analyzer='word',stop_words='english')

In [59]:
matrix = tfid.fit_transform(df['text'])

In [60]:
similar = cosine_similarity(matrix)

In [61]:
similar[0]

array([1.        , 0.05095693, 0.00545976, ..., 0.0254295 , 0.05655509,
       0.07663379])

In [63]:
df[df['song'] == "God Is In The House"].index[0]

np.int64(4995)

In [38]:
def recommender(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance =  sorted(list(enumerate(similar[idx])),reverse=True,key = lambda x: x[1])
    song = []
    for s_id in distance[1:5]:
        song.append(df.iloc[s_id[0]].song)
    return song


In [64]:
recommender('God Is In The House')

['God Love You',
 'Vessel',
 'The House That Mercy Built',
 "I Don't Wanna Play House"]

In [65]:
import pickle

In [66]:
pickle.dump(similar,open('similarity','wb'))

In [67]:
pickle.dump(df,open('df','wb'))