In [53]:
import pandas as pd
import numpy as np

In [54]:
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

# Download the dataset
api.dataset_download_files('noorsaeed/songs-recommendation-dataset', path='data/', unzip=True)


Dataset URL: https://www.kaggle.com/datasets/noorsaeed/songs-recommendation-dataset


In [55]:
df = pd.read_csv('data/songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [56]:
df.shape

(57650, 4)

In [57]:
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)

In [58]:
df.head()

Unnamed: 0,artist,song,text
0,Megadeth,Symphony Of Destruction,You take a mortal man \nAnd put him in contro...
1,Uncle Tupelo,Black Eye,He had a black eye \nHe was proud of \nLike ...
2,Elvis Presley,Find Out What's Happening,Baby you know me well \nYou know I mean what ...
3,Rolling Stones,Claudine,Claudine's back in jail again \nClaudine's ba...
4,Kirsty Maccoll,Teenager In Love,He's my boyfriend but he treats me so mean and...


ConteNT bASED RECOMMENDATION SYSTEM

In [59]:
df['artist']

0                 Megadeth
1             Uncle Tupelo
2            Elvis Presley
3           Rolling Stones
4           Kirsty Maccoll
               ...        
4995           Demi Lovato
4996             Metallica
4997    Christina Aguilera
4998               Santana
4999                  Korn
Name: artist, Length: 5000, dtype: object

In [60]:
df['song'][0]

'Symphony Of Destruction'

In [61]:
df['text'][0]

"You take a mortal man  \nAnd put him in control  \nWatch him become a god  \nWatch people's heads a'roll  \nA'roll, a' roll  \n  \nJust like the Pied Piper  \nLed rats through the streets  \nWe dance like marionettes  \nSwaying to the symphony  \nOf destruction  \n  \nActing like a robot  \nIts metal brain corrodes  \nYou try to take its pulse  \nBefore the head explodes  \nExplodes, explodes  \n  \nJust like the Pied Piper  \nLed rats through the streets  \nWe dance like marionettes  \nSwaying to the symphony  \n  \nJust like the Pied Piper  \nLed rats through the streets  \nWe dance like marionettes  \nSwaying to the symphony  \nSwaying to the symphony  \nOf destruction  \n  \nThe earth starts to rumble  \nWorld powers fall  \nA'warring for the heavens  \nA peaceful man stands tall  \nTall, tall  \n  \nJust like the Pied Piper  \nLed rats through the streets  \nWe dance like marionettes  \nSwaying to the symphony  \n  \nJust like the Pied Piper  \nLed rats through the streets  \nWe 

Text Preprocessing 

In [62]:
# Convert all text in the 'text' column to lowercase
# Remove special characters (anything that is not a word character or whitespace)
# Replace newline characters (\n) with a space
# df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ', regex=True)
df['text'] = df['text'].str.lower().str.replace(r'[^\w\s]', '', regex=True).str.replace(r'\n', ' ', regex=True)


In [63]:
df['text'][0]


'you take a mortal man   and put him in control   watch him become a god   watch peoples heads aroll   aroll a roll      just like the pied piper   led rats through the streets   we dance like marionettes   swaying to the symphony   of destruction      acting like a robot   its metal brain corrodes   you try to take its pulse   before the head explodes   explodes explodes      just like the pied piper   led rats through the streets   we dance like marionettes   swaying to the symphony      just like the pied piper   led rats through the streets   we dance like marionettes   swaying to the symphony   swaying to the symphony   of destruction      the earth starts to rumble   world powers fall   awarring for the heavens   a peaceful man stands tall   tall tall      just like the pied piper   led rats through the streets   we dance like marionettes   swaying to the symphony      just like the pied piper   led rats through the streets   we dance like marionettes   swaying to the symphony   

Tokenization and Stemming 

In [65]:
# from nltk.data import find

# try:
#     find('tokenizers/punkt')
#     print("✅ 'punkt' is available.")
# except LookupError:
#     print("❌ 'punkt' not found. Download it using nltk.download('punkt')")


✅ 'punkt' is available.


In [68]:
import nltk
from nltk.stem.porter import PorterStemmer

# Download 'punkt_tab'
nltk.download('punkt_tab')

ps = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [ps.stem(w) for w in tokens]
    return " ".join(stemming)

# Checking the tokenization function
tokenization('hello i loved when you loving me lovers')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


'hello i love when you love me lover'

In [69]:
# CONVERTING WORKDS INTO BASE format
# LOVES,LOVING,LOVED = LOVE

import nltk
from nltk.stem.porter import PorterStemmer

stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [70]:
df['text'] = df['text'].apply(lambda x: tokenization(x))


In [71]:
df['text']

0       you take a mortal man and put him in control w...
1       he had a black eye he wa proud of like some of...
2       babi you know me well you know i mean what i s...
3       claudin back in jail again claudin back in jai...
4       he my boyfriend but he treat me so mean and so...
                              ...                        
4995    there a boy lost hi way look for someon to pla...
4996    slave hebrew born to serv to the pharaoh heed ...
4997    she come from across the street red lipstick o...
4998    i am a victim of my time a produc of my age th...
4999    ill die smile someth the past someon say to me...
Name: text, Length: 5000, dtype: object

In [72]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [73]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [74]:
matrix.shape

(5000, 19358)

In [75]:
similarity[0]


array([1.        , 0.029367  , 0.00686265, ..., 0.01055666, 0.00727022,
       0.00144395], shape=(5000,))

In [79]:
df['song'][0]

'Symphony Of Destruction'

In [80]:
df[df['song']=='Symphony Of Destruction']


Unnamed: 0,artist,song,text
0,Megadeth,Symphony Of Destruction,you take a mortal man and put him in control w...


In [76]:
df[df['song']==''].index[0]


IndexError: index 0 is out of bounds for axis 0 with size 0

In [81]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [83]:
recommendation('Alone')


["I'm Goin'",
 "I'm Alone",
 "Goin' Out Of My Head",
 "Goin' Down",
 'By Myself But Not Alone',
 'All Alone',
 'Pass It Around',
 'I Would Find You',
 'Leave My Woman Alone',
 "I Think We're Alone Now",
 'Dark Hollow',
 'Not Alone',
 'How Long Has This Been Going On',
 "Things Goin' On",
 "When We're Old Men",
 'Whenever You Need Somebody',
 'Men Are All The Same',
 "All I'm Thinkin' About",
 'I Get No Sleep',
 'Blue Again']

In [84]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))