In [1]:
import pandas as pd
df = pd.read_csv("spotify_millsongdata.csv")
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [2]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [3]:
df = df.drop('link',axis=1).reset_index(drop=True)
df.head(5)

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...


In [4]:
df.shape

(57650, 3)

Text cleaning

In [5]:
df['text'] = df['text'].str.lower().replace(r'a-ZA-Z0-9',' ').replace(r'\n',' ',regex = True).replace(r'\r',' ',regex = True)
df['text'].head(5)

0    look at her face, it's a wonderful face    and...
1    take it easy with me, please    touch me gentl...
2    i'll never know why i had to go    why i had t...
3    making somebody happy is a question of give an...
4    making somebody happy is a question of give an...
Name: text, dtype: object

In [6]:
import nltk
from nltk.stem.porter import PorterStemmer

In [7]:
stemmer = PorterStemmer()

In [8]:
def token(txt):
    tokens = nltk.word_tokenize(txt)
    stemmed_tokens = [stemmer.stem(s) for s in tokens]
    return " ".join(stemmed_tokens)

In [9]:
token("you are beautiful my beauty")

'you are beauti my beauti'

In [10]:
df['text'].apply(lambda x :token(x))

0        look at her face , it 's a wonder face and it ...
1        take it easi with me , pleas touch me gentli l...
2        i 'll never know whi i had to go whi i had to ...
3        make somebodi happi is a question of give and ...
4        make somebodi happi is a question of give and ...
                               ...                        
57645    iri day come on play let the angel fli let the...
57646    power to the worker more power power to the wo...
57647    all you need is someth i 'll believ flashlight...
57648    northern star am i frighten where can i go to ...
57649    come in make yourself at home i 'm a bit late ...
Name: text, Length: 57650, dtype: object

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
tfid = TfidfVectorizer(analyzer = 'word', stop_words = 'english')

In [13]:
matrix1 = tfid.fit_transform(df['text'].head(28825))
matrix2 = tfid.fit_transform(df['text'].tail(28825))
array1 = cosine_similarity(matrix1)
array2 = cosine_similarity(matrix2)

In [14]:
import numpy as np
array = np.concatenate((array1, array2), axis=0)
print(array)

[[1.00000000e+00 1.46110707e-03 9.95718035e-03 ... 6.74865406e-02
  2.71923476e-02 2.05043811e-03]
 [1.46110707e-03 1.00000000e+00 4.00594369e-03 ... 4.45231725e-03
  4.65186888e-03 4.98334466e-03]
 [9.95718035e-03 4.00594369e-03 1.00000000e+00 ... 1.70643246e-02
  3.55177243e-04 5.99954654e-03]
 ...
 [8.91979322e-03 1.59830781e-02 1.52487023e-02 ... 1.00000000e+00
  1.32581338e-02 1.21342448e-01]
 [1.63797194e-03 2.63919462e-03 0.00000000e+00 ... 1.32581338e-02
  1.00000000e+00 1.73327479e-02]
 [4.95997164e-03 1.98297835e-02 1.21418774e-01 ... 1.21342448e-01
  1.73327479e-02 1.00000000e+00]]


In [17]:
df[df['song']=='How Long']

Unnamed: 0,artist,song,text
17834,Rod Stewart,How Long,how long has this been going on? how long h...
26679,Bryan White,How Long,you say you need someone to love but when w...
29833,Dire Straits,How Long,"how long, how long baby how long has it bee..."
30719,Eagles,How Long,"like a blue bird with his heart removed, lonel..."
37369,Jackson Browne,How Long,when you look into a child's face and you'r...
41817,Lionel Richie,How Long,every time i see you and i look into your eyes...
51304,Santana,How Long,it's so hard just waitin' but i guess that'...
56052,Wet Wet Wet,How Long,hey there mr. taxi driver take me to the st...


Recommender Function

In [52]:
def song_recommender(song_name):
    song_id = df[df['song']==song_name].index[0]
    distance = sorted(list(enumerate(array[song_id])), reverse=True, key = lambda x:x[1])
    recommended_song = []
    for s_id in distance [1:6]:
        recommended_song.append(df.iloc[s_id[0]].song)
    return recommended_song

In [53]:
song_recommender("Bang")

['Sea Of Dreams',
 'The Prime Of Your Love',
 'Bang-A-Boomerang',
 'Dum Dum',
 'Give Me A Bit']

In [54]:
def artist_recommender(artist_name):
    artist_df = df[df['artist']== artist_name].head(5)
    songs_by_artist = artist_df['song'].tolist()
    return songs_by_artist

artist_recommender("Bryan White")

['A Hundred And One',
 'Between Now And Forever',
 'Everywhere I Turn',
 'How Lucky I Am',
 'I Stand All Alone']