In [36]:
import pandas as pd
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [37]:
music_data = pd.read_csv("spotify_millsongdata.csv")

In [38]:
display(music_data.head())
display(music_data.tail())

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [39]:
display(music_data.shape)
display(music_data.isna().sum())

(57650, 4)

artist    0
song      0
link      0
text      0
dtype: int64

In [40]:
music_data =music_data.sample(20000).drop('link', axis=1).reset_index(drop=True)

In [41]:
music_data.head(10)

Unnamed: 0,artist,song,text
0,Red Hot Chili Peppers,Naked In The Rain,Standin' on the corner of civilization \r\nTh...
1,Nirvana,"Gypsies, Tramps And Thieves",I was born in the wagon of a travelling show ...
2,Kid Rock,Cramp Ya Style,(Kid Rock) Everything I do gonna be funny \r\...
3,Savage Garden,So Beautiful,Whether I'm right or wrong \r\nThere's no phr...
4,Gucci Mane,Round 1 (pt. 2),"Chorus: \r\nThe dope game hard, \r\nThe rap ..."
5,Halloween,Witches' Brew,"By Hap Palmer \r\n \r\nDead leaves, seaweed,..."
6,Diana Ross,I Cried For You,I cried for you \r\nNow it's your turn to cry...
7,Metallica,Don't Tread On Me,"Liberty or death, what we so proudly hail \r\..."
8,Opeth,Wreath,Falling inside again \r\nThis nightmare alway...
9,Beautiful South,Liar's Bar,Well sitting in a bar alone \r\nWhere no-one ...


In [42]:
music_data['text'][0]

"Standin' on the corner of civilization  \r\nThere's a time there's a place for me  \r\nIn a world where I can't be found  \r\nCold and mean people give me the creeps  \r\n  \r\nGoing to the jungle where the elephant roams  \r\nGot to get away gonna make it my home  \r\n  \r\nLosing my taste for the human race  \r\nSocial grace is a waste of time  \r\nIt's absurd when I look around  \r\nSo sublime that we blow my mind  \r\n  \r\nNaked in the rain with a killer whale  \r\nI can taste the salt when I lick his tail  \r\n  \r\n[Chorus]  \r\nNaked in the rain  \r\nDoctor Doolittle what's your secret  \r\nGive it to me doctor  \r\nDon't keep it  \r\n  \r\nI never met an animal that I didn't like  \r\nYou can come to me I won't bite  \r\nDon't you know dog is man's best friend  \r\nThere is some love that you can't fight  \r\n  \r\nNaked in the rain with black tattoos  \r\nRunnin' through the woods laughin' at the blues  \r\n  \r\n[Chorus]  \r\n  \r\nListen to the talking heart in my chest  \

In [43]:
music_data.shape

(20000, 3)

Text Cleaning/ Text Preprocessing

In [44]:
music_data['text'] = music_data['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [45]:
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [46]:
music_data['text'] = music_data['text'].apply(lambda x: tokenization(x))

In [47]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(music_data['text'])
similarity = cosine_similarity(matrix)

In [48]:
similarity[0]

array([1.        , 0.03497139, 0.01882417, ..., 0.07381359, 0.0542827 ,
       0.01114378])

In [50]:
music_data[music_data['song'] == "Don't Tread On Me"].index[0]

7

In [51]:
def recommendation(song_df):
    idx = music_data[music_data['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(music_data.iloc[m_id[0]].song)
        
    return songs

In [52]:
recommendation("Don't Tread On Me")

['Tread Careful',
 'Rest In Peace',
 'War No More',
 'Preparing To Fly',
 'Keep The Peace',
 'Settle Down My Boy',
 'Are You Red..Y',
 'Love Is War',
 'Empty Cages',
 'Two Tribes',
 'So This Is Christmas',
 'Hail To England',
 "It's A Jungle Out There",
 'The War Song',
 'Glass War',
 'Be Prepared',
 'Hail And Kill',
 'What Do You Want',
 'Let Peace Begin With Me',
 'Living In The Moment']

In [53]:
import pickle
pickle.dump(similarity,open('rec_spotify.pkl','wb'))
pickle.dump(music_data,open('music_data.pkl','wb'))