In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("spotify_millsongdata.csv")

In [3]:
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [4]:
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [5]:
df.shape

(57650, 4)

In [6]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [7]:
df =df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [8]:
df.head(10)

Unnamed: 0,artist,song,text
0,Rod Stewart,Girl From The North Country,If you're traveling in the north country fair ...
1,Gary Numan,Stormtrooper In Drag,So here am I \r\nQuite by chance near the pho...
2,Engelbert Humperdinck,Come Over Here,"Come over here, come over here \r\nCome over ..."
3,Pretenders,Message Of Love,Now the reason we're here \r\nAs man and woma...
4,Virgin Steele,I Will Come For You,Under tortured skies from a land with no sun ...
5,Kid Rock,All Summer Long,"It was 1989, my thoughts were short my hair wa..."
6,U2,Stories For Boys,There's a place I go \r\nAnd I am far away \...
7,Justin Bieber,She Don't Like The Lights,"She don't like the flash, wanna keep us in the..."
8,Nirvana,The End,"This is the end \r\nMy only friend, the end ..."
9,Bon Jovi,Life's Too Short For Days Like These,"Needed a car, looked around for a week \r\nFo..."


In [9]:
df['text'][0]

"If you're traveling in the north country fair  \r\nWhere the winds hit heavy on the borderline  \r\nRemember me to the one who lives there  \r\nShe was once a true love of mine  \r\n  \r\nAnd if you're goin' when the snowflakes storm  \r\nWhen the rivers freeze and summer ends  \r\nPlease see for me she has a coat so warm  \r\nTo keep her from the howling wind  \r\n  \r\nWould you see for me that her hair's hanging long  \r\nThat it rolls and flows all down her breasts  \r\nSee for me that her hair's hanging long  \r\n'Cause that's the way I remember her best  \r\n  \r\nBut I'm a-wondering if she remembers me at all  \r\nMany times I've often prayed  \r\nIn the darkness of my night  \r\nIn the brightness of my day  \r\n  \r\nSo if you're traveling in the north country fair  \r\nWhere the winds hit heavy on the borderline  \r\nRemember me to the one who lives there  \r\n'Cause she was once, she was once a true love of mine  \r\n  \r\nAnd she'll always be a true love of mine  \r\nAnd I 

In [10]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [11]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex = True)

In [12]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [13]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [15]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [16]:
similarity[0]

array([1.        , 0.01749113, 0.03661164, ..., 0.01182486, 0.05461833,
       0.08099006])

In [17]:
df[df['song'] == 'Girl From The North Country']

Unnamed: 0,artist,song,text
0,Rod Stewart,Girl From The North Country,if you 're travel in the north countri fair wh...


In [18]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [19]:
recommendation('Girl From The North Country')

['Please Remember Me',
 'Remember Me',
 "Let's Play Remember",
 'Wind Up',
 'Once Is Enough',
 "Don't Forget To Remember",
 'Love Is A Cold Wind',
 "Don't Forget To Remember",
 'I Will Remember',
 'Heavy Load',
 'Some Other Place, Some Other Time',
 'Master Of The Wind',
 'Wild As The Wind',
 'Vicious Streak',
 'Back To The Country',
 'All My Loving',
 'Wild Is The Wind',
 'I Remember L.A.',
 'One Day',
 'Always']

In [20]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))