In [54]:
import pandas as pd
import numpy as np

In [55]:
df = pd.read_csv('songdata.csv')
df.head(3)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...


In [56]:
df.shape

(57650, 4)

In [57]:
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)

In [58]:
df.shape

(5000, 3)

In [59]:
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ', regex=True)

In [60]:
df['text'][0]

'he went to paris   looking for answers   to questions that bothered him so      he was impressive   young and aggressive   savin\' the world on his own   but the warm summer breezes   the french wines and cheeses   put his ambition at bay   his summers and winters   scattered like splinters   and four to five years slipped away      then he went to england   played the piano   and married an actress named kim   they had a fine life, she was a good wife   and bore him young son named jim   and all of the answers, and all the questions   he locked in his attic one day   \'cause he liked the quiet   clean country livin\' and   twenty more years slipped away      well, the war took his baby   bombs killed his lady   and left him with only one eye   his body was battered   his whole world was shattered   and all he could do was just cry   while the tears were falling and he was recalling   answers he\'d never found   so he hopped on a freighter, skidded the ocean   and left england without

In [61]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [62]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [63]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [64]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [65]:
similarity[0]

array([1.        , 0.1436951 , 0.00491387, ..., 0.04821879, 0.03563214,
       0.06480705])

In [66]:
df['song'][0]

'He Went To Paris'

In [68]:
df[df['song']=='He Went To Paris'].index[0]

np.int64(0)

# recommedation function

In [69]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [70]:
recommendation('He Went To Paris')

['Cautious Man',
 'Needs His Woman',
 'A Place In The Country',
 'If We Are The Body',
 "He's Got The Whole World In His Hands",
 "He's Got The Whole World In His Hands",
 'Did You See His Name?',
 "We're Only Gonna Die For Our Arrogance",
 'All Is Well',
 'I Saw A Man And He Danced With His Wife',
 'But I Dropped It',
 'A Piece Of Sky',
 'The Good Son',
 'One Short Story',
 'Year Of The Knife',
 'The Spider Song',
 'Hand Of Sorrow',
 "Why Don't You Try",
 'Killing Me Softly',
 'Lady Stardust']

In [71]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))