In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head() 

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail()

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df = df.sample(5000).drop('link', axis = 1).reset_index(drop=True) 

In [8]:
df.head()

Unnamed: 0,artist,song,text
0,Europe,Ninja,"Tell me the story, tell me the legend \r\nTel..."
1,Pearl Jam,Crazy Mary,"She lived on the curve of the road, \r\nIn an..."
2,Phish,"My Friend, My Friend","My friend, my friend he's got a knife \r\nA s..."
3,Bob Marley,All In One,Bend down low \r\nLet me tell you what I know...
4,Point Of Grace,Steady On,"Kicking up the dust, heaven or bust \r\nWe're..."


In [9]:
df['text'][0]

"Tell me the story, tell me the legend  \r\nTell me the tales of war  \r\nTell me just one time,  \r\nWhat it was like before  \r\nBring me the feeling, right in that moment  \r\nWhen a heart for battle cries  \r\nFind me the treasure where the legend lies.  \r\n  \r\nIf I were a noble ancient knight  \r\nI'd stand by jour side to rule and fight  \r\nIt will always feel the same  \r\nWhen I call out your name.  \r\n  \r\nNinja survive, in dreams I walk by your side  \r\nNinja survive, with you there's no need to hide.  \r\n  \r\nA handful of sayings are more than a reason  \r\nFor me to feel this way  \r\nI'd like to be near you  \r\nMaybe for just one day  \r\nSo tell me the legend, tell me the story  \r\nTell me the tales of war  \r\nTell me just one time  \r\nWhat it was like before.  \r\n\r\n"

In [10]:
# df = df.sample(5000)

In [11]:
df.shape

(5000, 3)

### Text Cleaning / Text processing

In [12]:
df['text'] = df['text'].str.lower().replace(r'^\w\s',' ').replace(r'\n',' ', regex = True)

In [13]:
df.tail()

Unnamed: 0,artist,song,text
4995,Cheap Trick,Pop Drone,if i ever get back home again \r if i ever ge...
4996,Carly Simon,I Gave My Love A Cherry (The Riddle Song),i gave my love a cherry \r that had no stone ...
4997,Enrique Iglesias,I Have Always Loved You,since the beginning of time \r since it start...
4998,"Harry Connick, Jr.",This Guy's In Love With You,"you see this guy, \r this guy's in love with ..."
4999,King Diamond,A Corpse Without Soul,"listen, i'm a corpse, i'm a corpse \r i'm a c..."


In [14]:
!pip install --user -U nltk





In [15]:
# Natural Language Toolkit
import nltk
nltk.download('punkt_tab')
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\naren\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [16]:
stemmer = PorterStemmer()

In [17]:
def token(txt):
    token = nltk.word_tokenize(txt)
    a =[stemmer.stem(w) for w in token ]
    return " ".join(a)

In [18]:
token("you are beautiful, beauty")

'you are beauti , beauti'

In [19]:
df['text'].apply(lambda x:token(x))

0       tell me the stori , tell me the legend tell me...
1       she live on the curv of the road , in an old ,...
2       my friend , my friend he 's got a knife a stat...
3       bend down low let me tell you what i know now ...
4       kick up the dust , heaven or bust we 're head ...
                              ...                        
4995    if i ever get back home again if i ever get ba...
4996    i gave my love a cherri that had no stone i ga...
4997    sinc the begin of time sinc it start to rain s...
4998    you see thi guy , thi guy 's in love with you ...
4999    listen , i 'm a corps , i 'm a corps i 'm a co...
Name: text, Length: 5000, dtype: object

In [20]:
!pip install --user scikit-learn





In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
tfid = TfidfVectorizer(analyzer = 'word', stop_words = 'english')

In [23]:
matrix = tfid.fit_transform(df['text'])

In [24]:
matrix

<5000x23840 sparse matrix of type '<class 'numpy.float64'>'
	with 267860 stored elements in Compressed Sparse Row format>

In [25]:
similar = cosine_similarity(matrix)

In [26]:
similar[0]

array([1.        , 0.00276034, 0.00241948, ..., 0.03002278, 0.10327184,
       0.00126837])

### Recommender Function

In [28]:
def recommender(song_name):
    idx = df[df['song']==song_name].index[0]
    distance = sorted(list(enumerate(similar[idx])), reverse = True, key= lambda x:x[1])
    song = []
    for s_id in distance[1:5]: # if I change the value here the number of recommendations changes
        song.append(df.iloc[s_id[0]].song)
    return song

In [29]:
recommender("Sweetheart")

['As Long As You Love Me',
 'King Of The Thing',
 'For Once In My Life',
 'As Long As I Live']

In [33]:
import pickle

In [36]:
pickle.dump(similar, open("similarity.pkl", "wb"))

In [37]:
pickle.dump(df,open("df.pkl", "wb"))