In [22]:
import pandas as pd

df= pd.read_csv("spotify_millsongdata.csv")
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [23]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [24]:
df= df.sample(5000).drop("link", axis= 1).reset_index(drop= True)
df.head()

Unnamed: 0,artist,song,text
0,Randy Travis,Do I Ever Cross Your Mind,"Oh, sometimes I go walking through fields wher..."
1,Mary Black,The Moon And St. Christopher,"When I was young I spoke like a child, And I s..."
2,Fleetwood Mac,Big Love,Looking out for love \r\nIn the night so stil...
3,Stone Roses,Fools Gold,The gold road's sure a long road \r\nWinds on...
4,Bonnie Raitt,Storm Warning,"Storm warning, \r\nFeels like a heavy rain. ..."


In [25]:
df['text'][0]

"Oh, sometimes I go walking through fields where we walked  \r\nLong ago in the sweet used to be  \r\nAnd the flowers still grow, but they don't smell as sweet  \r\nAs they did when you picked them for me  \r\n  \r\nAnd when I think of you and the love we once knew  \r\nHow I wish we could go back in time  \r\nDo you ever think back on old memories like that  \r\nOr do I ever cross your mind  \r\n  \r\nOh, how often I wish that again I could kiss  \r\nYour sweet lips like I did long ago  \r\nAnd how often I long for those two loving arms  \r\nThat once held me so gentle and close  \r\n  \r\nAnd when I think of you and the love we once knew  \r\nHow I wish we could go back in time  \r\nDo you ever recall these old memories at all  \r\nOr do I ever cross your mind  \r\n  \r\nWhen old memories appear my eyes wont stay clear  \r\nWhen I think of those happier times  \r\nDo you ever recall these old memories at all  \r\nOr do I ever cross your mind  \r\n  \r\nDo you ever think back on old m

In [26]:
# For Simplicity we are taking a sample
# df= df.sample(5000)

In [27]:
df.head()

Unnamed: 0,artist,song,text
0,Randy Travis,Do I Ever Cross Your Mind,"Oh, sometimes I go walking through fields wher..."
1,Mary Black,The Moon And St. Christopher,"When I was young I spoke like a child, And I s..."
2,Fleetwood Mac,Big Love,Looking out for love \r\nIn the night so stil...
3,Stone Roses,Fools Gold,The gold road's sure a long road \r\nWinds on...
4,Bonnie Raitt,Storm Warning,"Storm warning, \r\nFeels like a heavy rain. ..."


In [28]:
df.shape

(5000, 3)

## Text Preprocessing/Cleaning

In [29]:
df['text']= df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n',' ', regex= True)       #a-ZA-Z0-9 = \w\s

In [30]:
# As there are many approaches for this recommendation system i.e. Content-Based Filtering, Collaborative Filtering, Hybrid....etc
# I'm gonna use the Content Based Filtering

import nltk
from nltk.stem.porter import PorterStemmer


In [31]:
stemmer = PorterStemmer()

In [32]:
def token(txt):
    token= nltk.word_tokenize(txt)
    a = [stemmer.stem(w) for w in token]
    return " ".join(a)

In [33]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [34]:
df['text'].apply(lambda x: token(x))

0       oh , sometim i go walk through field where we ...
1       when i wa young i spoke like a child , and i s...
2       look out for love in the night so still oh i '...
3       the gold road 's sure a long road wind on thro...
4       storm warn , feel like a heavi rain . wind on ...
                              ...                        
4995    my heart skips-skips-skips-skips-skips-skip a ...
4996    i 've been think reminisc 'bout the day when w...
4997    he rememb the first time he met her he rememb ...
4998    it seem like happi is ju ' a thing call joe he...
4999    toss into my mind , stir the calm you splash m...
Name: text, Length: 5000, dtype: object

In [35]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [36]:
tfid = TfidfVectorizer(analyzer='word', stop_words='english')
matrix = tfid.fit_transform(df['text'])

In [37]:
matrix

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 269946 stored elements and shape (5000, 23762)>

In [38]:
similar = cosine_similarity(matrix)
similar[0]

array([1.        , 0.02243137, 0.03792163, ..., 0.08598076, 0.02243864,
       0.01309369])

## Recommender Function

In [41]:
df[df['song'] == "Do I Ever Cross Your Mind"].index[0]

np.int64(0)

In [44]:
def recommender(song_name):
    idx = df[df['song'] == song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse= True, key= lambda x:x[1])
    song = []
    for s_id in distance[1:21]:      # 1:21 is for how many lines needed to display
        song.append(df.iloc[s_id[0]].song)
    return song

In [49]:
recommender("Do I Ever Cross Your Mind")

['Memories',
 'Back To The Old House',
 'Do I Ever Cross Your Mind',
 'Never',
 'Sabotage',
 'A Long, Long Time Ago',
 'This Old Heart Of Mine (Is Weak For You)',
 "If We Can't Be The Same Old Sweethearts",
 'Losing You (Track',
 'King Of The Thing',
 'A Long Time Gone',
 'Memories Fade',
 'Hard To Say Goodbye',
 'Say My Name',
 'I Wish',
 'And I Think Of You',
 "Love Isn't Easy",
 'Same Old Song And Dance',
 'Old Time Rock N Roll',
 'Sweet Evening Breeze']

In [50]:
import pickle
pickle.dump(similar, open("similarity", "wb"))
pickle.dump(df, open("df", "wb"))