# Music Recommendation System


In [1]:
import pandas as pd

In [2]:

df = pd.read_csv("C:/Users/NTC/Downloads/music/spotify_millsongdata.csv")
df.head(5)
df.tail(5)

Unnamed: 0,artist,song,link,text
57645,Ziggy Marley,Good Old Days,/z/ziggy+marley/good+old+days_10198588.html,Irie days come on play \r\nLet the angels fly...
57646,Ziggy Marley,Hand To Mouth,/z/ziggy+marley/hand+to+mouth_20531167.html,Power to the workers \r\nMore power \r\nPowe...
57647,Zwan,Come With Me,/z/zwan/come+with+me_20148981.html,all you need \r\nis something i'll believe \...
57648,Zwan,Desire,/z/zwan/desire_20148986.html,northern star \r\nam i frightened \r\nwhere ...
57649,Zwan,Heartsong,/z/zwan/heartsong_20148991.html,come in \r\nmake yourself at home \r\ni'm a ...


In [3]:
df.shape

(57650, 4)

In [4]:
df.isnull().sum()

artist    0
song      0
link      0
text      0
dtype: int64

In [5]:
df = df.sample(5000).drop('link', axis=1).reset_index(drop=True)

In [6]:
df.head(10)

Unnamed: 0,artist,song,text
0,Travis,Coming Around,Never see you coming around \r\nThey know the...
1,Ace Of Base,Come To Me,[Intro:] \r\n(Dance Music Playing) \r\n \r\...
2,Andy Williams,Let It Be Me,God bless the day I found you \r\nI want to s...
3,David Allan Coe,If This Is Just A Game,"If I give my love to you girl, will I get love..."
4,Otis Redding,That's What My Heart Needs,"You left me, for another \r\nYou told me, he ..."
5,Zao,Praise The War Machine,The death of the Omega pilots the war machine ...
6,Paul Simon,Star Carol,Long years ago on a deep winter night \r\nHig...
7,Natalie Cole,Sophisticated Lady,"Written by chuck jackson, marvin yancy and nat..."
8,Stevie Wonder,Seems So Long,Lady love and lady cared \r\nBut lady went aw...
9,W.A.S.P.,Black Bone Torso,"Black bone torso, \r\nblack bone torso. \r\n..."


In [7]:
df.tail(10)

Unnamed: 0,artist,song,text
4990,Bruce Springsteen,Land Of Hope And Dreams,Grab your ticket and your suitcase \r\nThunde...
4991,O.A.R.,Conquering Fools,Set out on the voyage of the conquering fools ...
4992,Human League,The Lebanon,She dreams of nineteen sixty-nine \r\nBefore ...
4993,Cliff Richard,Ease Along,Spent all of his life in love with money \r\n...
4994,Misfits,Spinal Remains,"This isn't really death, this isn't really lif..."
4995,John Mellencamp,Hit The Road Jack,"Written by Percy Mayfield \r\n \r\nAh, ah, a..."
4996,Lea Salonga,Fallin',I'm afraid to fly \r\nAnd I don't know why \...
4997,Van Halen,Why Can't This Be Love,"Whoa, here it comes \r\nThat funny feelin' ag..."
4998,Tom Lehrer,Smut,"I do have a cause, though, it is obscenity. I'..."
4999,Justin Bieber,The Christmas Song,[Justin Bieber] \r\nChestnuts roasting on an ...


In [8]:
df['text'][0]

"Never see you coming around  \r\nThey know they got their heads screwed on  \r\nI'm standing in the middle of town  \r\nI know I might never come home  \r\nJust standing where I am with all the people passing by me  \r\nThe sound of all these passers-by mixed in with the bus and motor-car  \r\nI must be sure these are the signs  \r\nCause I've been here a million times before  \r\n  \r\nJust tell me when it's coming around, coming around  \r\nI think I see you coming to town, hunting you down  \r\nBringing you round  \r\n  \r\nTell me if I'm bringing you down  \r\nCause I was fine till you came along  \r\nYou tell me that the tears of a clown cloud  \r\nThat I'm confusing while abusing my mind  \r\nSo far away I wanna be  \r\nThat's not as close to you and me  \r\nThe things they call our destiny  \r\nNow why do you have to pick on me at all?  \r\nMy walls are coming down  \r\n  \r\nJust tell me when it's coming around, coming around  \r\nI think I see you coming to town, hunting you 

In [9]:
df.shape

(5000, 3)

Text Cleaning/ Text Preprocessing

In [10]:
df['text'] = df['text'].str.lower().replace(r'^\w\s', ' ').replace(r'\n', ' ', regex=True)

In [11]:
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
nltk.download('punkt')



def tokenization(txt):
    tokens = nltk.word_tokenize(txt)
    stemming = [stemmer.stem(w) for w in tokens]
    return " ".join(stemming)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\NTC\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [12]:
df['text'] = df['text'].apply(lambda x: tokenization(x))

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
tfidvector = TfidfVectorizer(analyzer='word', stop_words='english')
matrix = tfidvector.fit_transform(df['text'])
similarity = cosine_similarity(matrix)

In [15]:
similarity[0]

array([1.        , 0.30939379, 0.09148642, ..., 0.16502476, 0.03676262,
       0.01083219])

In [16]:
def recommendation(song_df):
    # Check if the song exists in the DataFrame
    if song_df not in df['song'].values:
        return "Song not found in the dataset."

    # Find the index of the song in the DataFrame
    idx = df[df['song'] == song_df].index[0]

    # Sort the similarities and get the top 20 similar songs
    distances = sorted(
        enumerate(similarity[idx]), reverse=True, key=lambda x: x[1])[1:21]

    # Retrieve the names of the top 20 similar songs
    similar_songs = [df.iloc[m_id[0]]['song'] for m_id in distances]

    return similar_songs

In [17]:
recommendation('A Baby Just Like You')

'Song not found in the dataset.'

In [18]:
import pickle
pickle.dump(similarity, open('similarity.pkl', 'wb'))
pickle.dump(df, open('df.pkl','wb'))

In [19]:
import os
current_dir = os.getcwd()
# Define the file paths
similarity_path = os.path.join(current_dir, 'similarity.pkl')
df_path = os.path.join(current_dir, 'df.pkl')

# Dump the objects to pickle files
pickle.dump(similarity, open(similarity_path, 'wb'))
pickle.dump(df, open(df_path, 'wb'))

# Print out the paths
print(f"Similarity pickle file saved at: {similarity_path}")
print(f"DataFrame pickle file saved at: {df_path}")

Similarity pickle file saved at: c:\Users\NTC\Downloads\similarity.pkl
DataFrame pickle file saved at: c:\Users\NTC\Downloads\df.pkl
