<a href="https://colab.research.google.com/github/Nexus4757/Music-Recommendation-System/blob/main/Final_Music_recommendationsys_collaborative.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Load the dataset into a DataFrame
songs_df = pd.read_csv('spotify_millsongdata.csv')

In [None]:

# Drop unnecessary columns (e.g., 'link') to keep relevant datasongs_df = songs_df.drop('link', axis=1).reset_index(drop=True)
songs_df.head(10)

Unnamed: 0,artist,song,text
0,ABBA,Ahe's My Kind Of Girl,"Look at her face, it's a wonderful face \r\nA..."
1,ABBA,"Andante, Andante","Take it easy with me, please \r\nTouch me gen..."
2,ABBA,As Good As New,I'll never know why I had to go \r\nWhy I had...
3,ABBA,Bang,Making somebody happy is a question of give an...
4,ABBA,Bang-A-Boomerang,Making somebody happy is a question of give an...
5,ABBA,Burning My Bridges,"Well, you hoot and you holler and you make me ..."
6,ABBA,Cassandra,Down in the street they're all singing and sho...
7,ABBA,Chiquitita,"Chiquitita, tell me what's wrong \r\nYou're e..."
8,ABBA,Crazy World,I was out with the morning sun \r\nCouldn't s...
9,ABBA,Crying Over You,I'm waitin' for you baby \r\nI'm sitting all ...


In [None]:
# Display the first 10 rows of the cleaned dataset for verification
songs_df['text'][0]

"Look at her face, it's a wonderful face  \r\nAnd it means something special to me  \r\nLook at the way that she smiles when she sees me  \r\nHow lucky can one fellow be?  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?  \r\n  \r\nAnd when we go for a walk in the park  \r\nAnd she holds me and squeezes my hand  \r\nWe'll go on walking for hours and talking  \r\nAbout all the things that we plan  \r\n  \r\nShe's just my kind of girl, she makes me feel fine  \r\nWho could ever believe that she could be mine?  \r\nShe's just my kind of girl, without her I'm blue  \r\nAnd if she ever leaves me what could I do, what could I do?\r\n\r\n"

In [None]:
# As Current GPU runs out of RAM for more data
songs_df = songs_df.sample(5000).reset_index(drop=True)
songs_df.head()

Unnamed: 0,artist,song,text
0,Kenny Rogers,Heart To Heart,what we 're look at here is a harder situat le...
1,Rolling Stones,Love Is Strong,love is strong and you 're so sweet you make m...
2,Kid Rock,What I Learned Out On The Road,headin ' for the south land lookin ' for some ...
3,David Pomeranz,Born For You,too mani billion peopl run around the planet w...
4,Cake,End Of The Movie,peopl you love will turn their back on you . y...


In [None]:
# Need to remove technical jargon from lyrics i.e. text pre-processing
songs_df['text'] = songs_df['text'].str.lower().replace(r'^\w\s',' ').replace(r'\n', ' ', regex= True)

In [None]:
# Display the first 5 rows of the cleaned dataset
songs_df.head()

Unnamed: 0,artist,song,text
0,Kenny Rogers,Heart To Heart,what we 're look at here is a harder situat le...
1,Rolling Stones,Love Is Strong,love is strong and you 're so sweet you make m...
2,Kid Rock,What I Learned Out On The Road,headin ' for the south land lookin ' for some ...
3,David Pomeranz,Born For You,too mani billion peopl run around the planet w...
4,Cake,End Of The Movie,peopl you love will turn their back on you . y...


In [None]:
# Download necessary NLTK data
# Initialize the Porter Stemmer for stemming words
import nltk
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# Function to tokenize and stem text
def token(toks):
  # Tokenize the text into words
  tokens = nltk.word_tokenize(toks)
  stemmed_tokens = [stemmer.stem(word) for word in tokens]
  return " ".join(stemmed_tokens)

In [None]:
# Display output of the function for verification
token("look at her wonderful, wonder")

'look at her wonder , wonder'

In [None]:
# Apply tokenization and stemming to the text column
songs_df['text'] = songs_df['text'].apply(lambda x: token(x))

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Feature extraction using TF-IDF - Initialize the TF-IDF Vectorizer with English stop words
tfid = TfidfVectorizer(analyzer='word', stop_words='english')

In [None]:
# Fit and transform the text data into a sparse matrix
sparse_matrix = tfid.fit_transform(songs_df['text'])

In [None]:
sparse_matrix

<5000x17601 sparse matrix of type '<class 'numpy.float64'>'
	with 276680 stored elements in Compressed Sparse Row format>

In [None]:
# Compute the cosine similarity matrix for the text data
similarity = cosine_similarity(sparse_matrix)

In [None]:
similarity[0]

array([1.        , 0.09997972, 0.05301225, ..., 0.05681165, 0.02196694,
       0.0206138 ])

In [None]:
songs_df.head()

Unnamed: 0,artist,song,text
0,Kenny Rogers,Heart To Heart,what we 're look at here is a harder situat le...
1,Rolling Stones,Love Is Strong,love is strong and you 're so sweet you make m...
2,Kid Rock,What I Learned Out On The Road,headin ' for the south land lookin ' for some ...
3,David Pomeranz,Born For You,too mani billion peopl run around the planet w...
4,Cake,End Of The Movie,peopl you love will turn their back on you . y...


In [None]:
songs_df[songs_df['song'] == 'Heart To Heart'].index[0]

0

# Recommender Function

In [None]:
# Define a function to recommend similar songs
def recommender(song):
  if song not in songs_df['song'].values:
    return "Song not found in the database"
  idx = songs_df[songs_df['song'] == song].index[0]
  # Compute similarity scores and sort by descending order
  distance= sorted(list(enumerate(similarity[idx])), reverse=True, key=lambda x:x[1])
  song=[]
  # Retrieve the top 20 most similar songs (excluding the input song)
  for id in distance[1:21]:
    song.append(songs_df.iloc[id[0]].song)
  return song

           artist                            song  \
0    Kenny Rogers                  Heart To Heart   
1  Rolling Stones                  Love Is Strong   
2        Kid Rock  What I Learned Out On The Road   
3  David Pomeranz                    Born For You   
4            Cake                End Of The Movie   

                                                text  
0  what we 're look at here is a harder situat le...  
1  love is strong and you 're so sweet you make m...  
2  headin ' for the south land lookin ' for some ...  
3  too mani billion peopl run around the planet w...  
4  peopl you love will turn their back on you . y...  


In [None]:
# Code usage
recommender("Heart To Heart")

['Give Your Heart A Break',
 'Hearts',
 'All I Can Give You Is My Heart',
 'It Takes Love',
 'Talk To My Heart',
 'From My Heart',
 'Have A Heart',
 'Heart To Heart',
 'She Never Lets It Go To Her Heart',
 "Don't Let Him Steal Your Heart Away",
 'Wicked Game',
 'I Just Called To Say I Love You',
 'My Heart Beats For Love',
 'Love Knows I Love You',
 'Devil In Her Heart',
 'Heart On The Line',
 'Heart To Heart',
 'Let Her Go',
 'Surrender',
 'Hearts Get Broken All The Time']