In [22]:
import pandas as pd
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [23]:
df = pd.read_csv("spotify_millsongdata.csv")

In [24]:
df.shape

(57650, 4)

In [25]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [26]:
df =df.drop('link',axis=1).reset_index(drop=True)
df.head(10)

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,"Well, you hoot and you holler and you make me ..."
6,ABBA,Cassandra,Down in the street they're all singing and sho...
7,ABBA,Chiquitita,"Chiquitita, tell me what's wrong \r\nYou're e..."
8,ABBA,Crazy World,I was out with the morning sun \r\nCouldn't s...
9,ABBA,Crying Over You,I'm waitin' for you baby \r\nI'm sitting all ...


In [27]:

df['text'][0]

"Look at her face, it's a wonderful face  \r\nAnd it means something special to me  \r\nLook at the way that she smiles when she sees me  \r\nHow lucky can one fellow be?  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?  \r\n  \r\nAnd when we go for a walk in the park  \r\nAnd she holds me and squeezes my hand  \r\nWe'll go on walking for hours and talking  \r\nAbout all the things that we plan  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?\r\n\r\n"

In [28]:
dfs = df.sample(5000)
dfs.shape

(5000, 3)

Text Cleaning and preprocessing

In [29]:
dfs['text']=dfs['text'].str.lower().replace(r'\w\s',' ').replace(r'\n',' ',regex=True)

In [30]:
stemmer = PorterStemmer()

In [31]:
def token(txt):
   token = nltk.word_tokenize(txt,preserve_line=True)
   a=[stemmer.stem(w) for w in token]
   return " ".join(a)

print(token("you are beautiful, beauty"))

you are beauti , beauti


In [32]:
dfs['text'].apply(lambda x: token(x))

22804    it 's a fine time to fall in love with you , i...
13617    ey i 'm not the one to start no troubl i 'm no...
2145     my love is like a red , red rose that 's newli...
43582    you say you 're sure you wan na stay with me i...
16627    bewar of night for we all know he 's loos agai...
                               ...                        
32431    that 's underground and fli macho macho i do n...
30069    the spirit of fatima still rule the earth she ...
15829    for your own good call me tonight do n't you t...
6390     heh-heh-heh. .. have you heard the news ? ( ne...
34333    raini day peopl alway seem to know when it 's ...
Name: text, Length: 5000, dtype: object

In [33]:
tfid = TfidfVectorizer(analyzer='word',stop_words='english')

In [34]:
matrix = tfid.fit_transform(dfs['text'])

In [35]:
similar= cosine_similarity(matrix)

In [63]:
dfs.reset_index(inplace= True)
dfs.head(5)

Unnamed: 0,index,artist,song,text
0,22804,ZZ Top,Stages,"it's a fine time to fall in love with you, \r..."
1,13617,Ne-Yo,Be The One,ey i'm not the one to start no trouble \r i'm...
2,2145,Carly Simon,"A Red, Red Rose","my love is like a red, red rose \r that's new..."
3,43582,Michael Bolton,Heart Of Mine,you say you're sure \r you wanna stay with me...
4,16627,Queensryche,Nightrider,beware of night for we all know he's loose aga...


Recommender Function

In [68]:
def recommender(song_name):
    idx = dfs[dfs['song']==song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse=True, key = lambda x:x[1])
    song =[]
    for s_id in distance[1:10]:
        song.append(df.iloc[s_id[0]].song)
    return song

In [70]:
recommender("Nobody's Listening")

['Voices In My Head',
 'In The Sea',
 'Walk The Rockway',
 '21St Century Man',
 'You Never Satisfy Me',
 'Bring Out The Magic',
 "Growin' A Little Each Day",
 "Lady D'arbanville",
 'Marry You']

In [71]:
import pickle

In [74]:
pickle.dump(similar,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))