In [62]:
import pandas as pd

In [64]:
df = pd.read_csv("spotify_millsongdata.csv")

In [65]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [68]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [70]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ',regex = True).replace(r'\r', ' ',regex = True)

In [72]:
df.shape

(5000, 3)

In [74]:
df

Unnamed: 0,artist,song,text
0,Evanescence,Together Again,never thought that i'd be leaving you today ...
1,Kylie Minogue,Did It Again,clever girl think you are but you think too...
2,Vertical Horizon,Can You Help Me,can you finally reach me out beyond the blu...
3,Nirvana,Lake Of Fire,[chorus:] where do bad folks go when they d...
4,Grease,Beauty School Drop Out,"your story's sad to tell, a teenage ne'er-do-w..."
...,...,...,...
4995,Omd,Dollar Girl,when you know the dollar girl is coming hom...
4996,Kenny Loggins,With This Ring,i have believed i would always be free and wil...
4997,The White Stripes,Stop Breaking Down,some pretty mama she starts breakin' down ...
4998,Westlife,Until The End Of Time,until the end of time i...


In [76]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def token(txt):
    token = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in token]
    return " ".join(stemming)

In [78]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [80]:
df['text'].apply(lambda x: token(x))

0       never thought that i 'd be leav you today so a...
1       clever girl think you are but you think too mu...
2       can you final reach me out beyond the blue i '...
3       [ choru : ] where do bad folk go when they die...
4       your stori 's sad to tell , a teenag ne'er-do-...
                              ...                        
4995    when you know the dollar girl is come home aga...
4996    i have believ i would alway be free and wild ....
4997    some pretti mama she start breakin ' down stop...
4998    until the end of time i 'm long for you and if...
4999    you walk away from me tonight not know the rea...
Name: text, Length: 5000, dtype: object

In [82]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [84]:
tfid = TfidfVectorizer(analyzer='word',stop_words='english')

In [86]:
matrix = tfid.fit_transform(df['text'])

In [88]:
similar = cosine_similarity(matrix)

In [90]:
similar[0]

array([1.        , 0.01614372, 0.0501646 , ..., 0.04430833, 0.1958755 ,
       0.01031723])

In [94]:
df[df['song'] == 'Together Again']

Unnamed: 0,artist,song,text
0,Evanescence,Together Again,never thought that i'd be leaving you today ...
3896,'n Sync,Together Again,how can i say that i love you when you're s...


In [96]:
df[df['song'] == 'Together Again'].index[0]

0

Recommender Function

In [107]:
def recommendation(song_name):
    idx = df[df['song'] == song_name].index[0]
    distances = sorted(list(enumerate(similar[idx])),reverse=True,key=lambda x:x[1])

    song = []
    for s_id in distances[1:21]:
        song.append(df.iloc[s_id[0]].song)
        
    return song

In [109]:
recommendation('Together Again')

["There's A World",
 'The Dream Is Over',
 'Soulitude',
 'Dream Love',
 'At The End Of A Rainbow',
 'Missing You-Featuring Amy Lee',
 'Monster',
 'I Still Believe',
 'Dreaming My Dreams With You',
 "I'll Be There For You",
 'All The World',
 'Forevermore',
 'I Need Your Love',
 'Why Did She Have To Leave Me (Why Did She Have To Go?)',
 'The World We Live In',
 'Dream Baby',
 'Nobody',
 'Oh, My Love',
 'Until The End Of Time',
 'My Kind Of Lady']

In [111]:
import pickle

In [113]:
pickle.dump(similar,open('similarity.pkl','wb'))

In [115]:
pickle.dump(df,open('df.pkl','wb'))