In [3]:
import pandas as pd
import numpy as np

In [4]:
from kaggle.api.kaggle_api_extended import KaggleApi

api = KaggleApi()
api.authenticate()

# Download the dataset
api.dataset_download_files('noorsaeed/songs-recommendation-dataset', path='data/', unzip=True)


Dataset URL: https://www.kaggle.com/datasets/noorsaeed/songs-recommendation-dataset


In [13]:
df = pd.read_csv('data/songdata.csv')
df.head()

Unnamed: 0,artist,song,link,text
0,ABBA,Ahe's My Kind Of Girl,/a/abba/ahes+my+kind+of+girl_20598417.html,"Look at her face, it's a wonderful face \nAnd..."
1,ABBA,"Andante, Andante",/a/abba/andante+andante_20002708.html,"Take it easy with me, please \nTouch me gentl..."
2,ABBA,As Good As New,/a/abba/as+good+as+new_20003033.html,I'll never know why I had to go \nWhy I had t...
3,ABBA,Bang,/a/abba/bang_20598415.html,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,/a/abba/bang+a+boomerang_20002668.html,Making somebody happy is a question of give an...


In [14]:
df.shape

(57650, 4)

In [15]:
df = df.sample(n=5000).drop('link', axis=1).reset_index(drop=True)

In [16]:
df.head()

Unnamed: 0,artist,song,text
0,Queen,Soul Brother,God bless my soul here he comes now \nThe man...
1,Pat Benatar,I've Got Papers On You,I've got papers on you baby \nYou gotta do wh...
2,Zox,Goodbye To You,Broken down in paradise \nI'm heavy like I'm ...
3,Grand Funk Railroad,Comfort Me,"I was found after losing my way, \nSafe and s..."
4,Frank Zappa,How Could I Be Such A Fool?,When I won your love \nI was very glad \nEve...


ConteNT bASED RECOMMENDATION SYSTEM

In [17]:
df['artist']

0                     Queen
1               Pat Benatar
2                       Zox
3       Grand Funk Railroad
4               Frank Zappa
               ...         
4995                   ABBA
4996                   Kiss
4997            John Martyn
4998              Tori Amos
4999          Joni Mitchell
Name: artist, Length: 5000, dtype: object

In [18]:
df['song'][0]

'Soul Brother'

In [19]:
df['text'][0]

"God bless my soul here he comes now  \nThe man with the most how does he do it?  \nSure he's got style he's so heavy  \nHe's a trip can do anything  \nAnything anything  \nHe's my soul brother  \n  \nHe's my best friend he's my champion  \nAnd he will rock you rock you rock you  \n'Cause he's the saviour of the universe  \nHe can make you keep yourself alive  \nMake yourself alive  \nOoh brother cause he's somebody somebody  \nHe can love  \nHe's my soul brother  \n  \nWhan you're under pressure feeling under pressure  \nYeah pressure yeah pressure  \nHe won't let you down  \nWhan you're under pressure  \nOh feeling under pressure yeah pressure  \nSo he won't let you down  \nHe won't he won't he won't let you down  \nHe can do anything anything anything  \nHe's my soul brother  \n  \nYea ah yeah yeah yeah yeah yeah  \nOoh soul brother anything (soul brother)  \nAnything (soul brother) anything (soul brother)  \nHe's my soul brother brother brother brother brother  \nAnything (soul bro

Text Preprocessing 

In [20]:
# Convert all text in the 'text' column to lowercase
# Remove special characters (anything that is not a word character or whitespace)
# Replace newline characters (\n) with a space
df['text'] = df['text'].str.lower().replace(r'[^\w\s]','').replace(r'\n',' ', regex=True)

In [21]:
df['text'][0]


"god bless my soul here he comes now   the man with the most how does he do it?   sure he's got style he's so heavy   he's a trip can do anything   anything anything   he's my soul brother      he's my best friend he's my champion   and he will rock you rock you rock you   'cause he's the saviour of the universe   he can make you keep yourself alive   make yourself alive   ooh brother cause he's somebody somebody   he can love   he's my soul brother      whan you're under pressure feeling under pressure   yeah pressure yeah pressure   he won't let you down   whan you're under pressure   oh feeling under pressure yeah pressure   so he won't let you down   he won't he won't he won't let you down   he can do anything anything anything   he's my soul brother      yea ah yeah yeah yeah yeah yeah   ooh soul brother anything (soul brother)   anything (soul brother) anything (soul brother)   he's my soul brother brother brother brother brother   anything (soul brother) anything (soul brother) 

In [None]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

In [None]:
df['text'] = df['text'].apply(lambda x: tokenization(x))


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
tfidvector = TfidfVectorizer(analyzer='word',stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [None]:
similarity[0]


In [None]:
df[df['song']==''].index[0]


In [None]:
def recommendation(song_df):
    idx = df[df['song'] == song_df].index[0]
    distances = sorted(list(enumerate(similarity[idx])),reverse=True,key=lambda x:x[1])
    
    songs = []
    for m_id in distances[1:21]:
        songs.append(df.iloc[m_id[0]].song)
        
    return songs

In [None]:
recommendation('Alma Mater')


In [None]:
import pickle
pickle.dump(similarity,open('similarity.pkl','wb'))
pickle.dump(df,open('df.pkl','wb'))