In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('songdata.csv')
df.head(5)

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [3]:
# reduce the size of sample data to 5000 rows and pefrom operation column wise
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)

In [4]:
df['song'][0]

'Anything Can Happen'

In [5]:
#before cleaning data
df['text'][0]

"Time will come when we know what happened here  \nChange will come in time and make it clear  \nWe learn one thing if we learn at all  \nIn the secret wars we call our lives, anything can happen  \nWe watch the days, we make our plans  \nWe change in ways, a life demands  \nBut whatever pleasure this life may allow  \nI'll give you the love I have for you now  \nBecause anything can happen in a world so full of fear  \nDreams are whispered in the dead of night and people disappear  \nBut you hear my heart and you know I'm still here  \nAlthough you know and I know you know  \nWhile I swear I'll never let you go, anything can happen  \nYeah, if this love can happen here  \nAnything can  \nWe watch the days, we make our plans  \nWe change in ways a life demands  \nI'll always remember this time, this place  \nThe hope in your voice, the light on your face  \nBecause anything can happen in a world so full of pain  \nPeople give their lives to making war and we call those people sane  \nB

In [6]:
# Removing special characters and convert upper case letter to Lower case letter
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ', regex=True)

In [7]:
# After Cleaning Data 
# Show Data again
df['text'][0]

"time will come when we know what happened here   change will come in time and make it clear   we learn one thing if we learn at all   in the secret wars we call our lives, anything can happen   we watch the days, we make our plans   we change in ways, a life demands   but whatever pleasure this life may allow   i'll give you the love i have for you now   because anything can happen in a world so full of fear   dreams are whispered in the dead of night and people disappear   but you hear my heart and you know i'm still here   although you know and i know you know   while i swear i'll never let you go, anything can happen   yeah, if this love can happen here   anything can   we watch the days, we make our plans   we change in ways a life demands   i'll always remember this time, this place   the hope in your voice, the light on your face   because anything can happen in a world so full of pain   people give their lives to making war and we call those people sane   but i love you where y

In [8]:
import nltk
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [ps.stem(w) for w in tokens]
    return " ".join(stemming)

# Download the 'punkt' resource
nltk.download('punkt')

# Apply tokenization function to 'text' column in DataFrame
df['text'] = df['text'].apply(lambda x: tokenization(x))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dhruv\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [9]:
df['text']

0       time will come when we know what happen here c...
1       i wa taught a month ago to bide my time and ta...
2       now i 'd like to turn to the folk song , which...
3       twenti citi , it pass me by find treasur i do ...
4       cloud differ shape and size hide the horizon c...
                              ...                        
4995    for all i know , you could be near and everi b...
4996    i just look to see the sun outsid it 's shine ...
4997    i 'm go to detail what happen to me , that the...
4998    when there 's a shadow near , reach for the su...
4999    we can drag you out of bed in the middl of the...
Name: text, Length: 5000, dtype: object

In [11]:
# cosine similarity and Tfid vectarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [12]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english') #remove useless words which have no meaning
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [13]:
# show similarity
similarity

array([[1.        , 0.02810153, 0.06549227, ..., 0.05908474, 0.0183648 ,
        0.04247188],
       [0.02810153, 1.        , 0.01633163, ..., 0.00224061, 0.0049526 ,
        0.00386631],
       [0.06549227, 0.01633163, 1.        , ..., 0.02279435, 0.00770109,
        0.0329237 ],
       ...,
       [0.05908474, 0.00224061, 0.02279435, ..., 1.        , 0.        ,
        0.03591217],
       [0.0183648 , 0.0049526 , 0.00770109, ..., 0.        , 1.        ,
        0.00678883],
       [0.04247188, 0.00386631, 0.0329237 , ..., 0.03591217, 0.00678883,
        1.        ]])

In [27]:
# creating recommendation fumction

In [42]:
# list(enumerate(similarity[idx]) this create a list of index we sort this because the most similar song come together 

In [43]:
# key=lambda x:x[1] means it start from 1st index  

In [16]:
def recommendation(song_df):
    matches= df[df['song'] == song_df]
    if matches.empty:
        print(f"No matches found for '{song_df}'.")
        return []
    idx=matches.index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [52]:
# calling recommendation function

In [17]:
recommendation('Zero Landmine')

No matches found for 'Zero Landmine'.


[]

In [18]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))