In [14]:
# Music Recommendation System (By gaurab aka kaito :p)
# (ps: there are many more things to bring in here in future so kinda sorry if it is not upto the mark)
# (ill be adding as much as comments for it being more understandable if you wanna try it out for yourself)
# This script uses a content-based recommendation system to suggest songs based on lyrics similarity.
# The core algorithm is TF-IDF to analyze song lyrics and cosine similarity to find similar songs.

# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD

In [15]:
# Load the dataset
# Here we are assuming a large dataset containing song lyrics, song titles, and artist names.
songs = pd.read_csv('Path/songdata.csv')
# Ensure columns are correctly referenced (assuming 'artist', 'song', 'link', 'text' are the column names)
songs.columns = ['artist', 'song', 'text']  # Explicitly naming the columns for clarity

In [16]:
# Clean the lyrics data by removing newline characters
songs['text'] = songs['text'].str.replace(r'\\n', ' ', regex=True)  # Use a space instead of removing them
# Sample a smaller subset for faster computation (you can adjust the sample size, but as it nearly explodes my pc ill keep it at 10k sorry :'|)
songs = songs.sample(n=10000).reset_index(drop=True)

In [17]:
# Use TF-IDF to vectorize the lyrics
# TF-IDF will transform the song lyrics into numerical values that reflect the importance of words
tfidf = TfidfVectorizer(analyzer='word', stop_words='english')
lyrics_matrix = tfidf.fit_transform(songs['text'])

In [18]:
# Dimensionality reduction with SVD (TruncatedSVD)
svd = TruncatedSVD(n_components=100)  # Adjust number of components based on your needs
lyrics_matrix_reduced = svd.fit_transform(lyrics_matrix)

In [19]:
# Compute cosine similarity between songs based on their lyrics
# Cosine similarity measures the angle between two vectors to find how similar they are
cosine_similarities = cosine_similarity(lyrics_matrix_reduced)

In [20]:
# Dictionary to store top similar songs for each song in the dataset
similarities = {}
# Loop to fill the dictionary with top 50 similar songs for each song
for i in range(len(cosine_similarities)):
    similar_indices = cosine_similarities[i].argsort()[:-51:-1]  # Sort and get top 50
    similarities[songs['song'].iloc[i]] = [
        (cosine_similarities[i][x], songs['song'].iloc[x], songs['artist'].iloc[x])
        for x in similar_indices[1:]  # Skip the first one because it's the same song
    ]

In [21]:
# Define the content-based recommender system class
class SongRecommender:
    def __init__(self, similarity_matrix):
        self.similarity_matrix = similarity_matrix

    def print_recommendations(self, target_song, recommended_songs):
        """ Prints the recommended songs in a user-friendly format """
        print(f"Recommended songs for '{target_song}':")
        for idx, (score, song, artist) in enumerate(recommended_songs, start=1):
            print(f"Recommendation {idx}:")
            print(f"Song: {song} | Artist: {artist} | Similarity Score: {round(score, 3)}")
            print("-------------")

    def find_song_key(self, song_name):
        """ Find the song key in a case-insensitive manner """
        normalized_song_name = song_name.lower()  # Normalize input song name
        for key in self.similarity_matrix.keys():
            if key.lower() == normalized_song_name:  # Compare in lowercase
                return key  # Return the key with the correct casing
        return None

    def recommend(self, song_request):
        """ Recommends similar songs based on a given song request """
        song_name = song_request['song']  # Keep original casing for display
        num_recommendations = song_request['number_songs']

        # Find the actual song key regardless of case
        actual_song_key = self.find_song_key(song_name)

        # Check if the song exists in the similarity matrix
        if actual_song_key:
            recommended_songs = self.similarity_matrix[actual_song_key][:num_recommendations]
            self.print_recommendations(actual_song_key, recommended_songs)
        else:
            print(f"Song '{song_name}' not found in the dataset.")

In [22]:
# Instantiate the recommendation system
# Assume 'similarities' is your similarity matrix
song_recommender = SongRecommender(similarities)

In [23]:
# Example recommendation request for a specific song
recommendation_request = {
    "song": songs['song'].iloc[10],  # Select the 10th song in the dataset
    "number_songs": 4  # Recommend 4 similar songs
}
song_recommender.recommend(recommendation_request)

Recommended songs for 'Who Dat':
Recommendation 1:
Song: About All That | Artist: Lil Wayne | Similarity Score: 0.856
-------------
Recommendation 2:
Song: I Got Some Money On Me | Artist: Lil Wayne | Similarity Score: 0.853
-------------
Recommendation 3:
Song: All About Money | Artist: Young Buck | Similarity Score: 0.853
-------------
Recommendation 4:
Song: U-Way (How We Do It) (Remix) | Artist: Youngbloodz | Similarity Score: 0.847
-------------


In [24]:
# (this is for Input-based recommendation if needed. but as it happens its a huge dataset and takes a toll on my pototo pc :p)
# (me will deff try to optimise it in the future tho if anyones reading the comments)
# def get_recommendations():
    # Ask for user input
    # song_name = input("Enter a song name: ")
    # num_recommendations = int(input("Enter the number of recommendations you want: "))  # Prompt for number of recommendations
    
    # Create a request dictionary
    # recommendation_request = {
        # 'song': song_name,
        # 'number_songs': num_recommendations
    # }

    # Generate recommendations
    # song_recommender.recommend(recommendation_request)

# Call the function to get recommendations
# get_recommendations()

In [25]:
# Another example recommendation
recommendation_request_2 = {
    "song": songs['song'].iloc[120],  # Select the 120th song in the dataset
    "number_songs": 4  # Recommend 4 similar songs
}


In [26]:
# Generate recommendations for the second example
song_recommender.recommend(recommendation_request_2)


Recommended songs for 'Star':
Recommendation 1:
Song: From The Inside | Artist: Def Leppard | Similarity Score: 0.96
-------------
Recommendation 2:
Song: More And More | Artist: R. Kelly | Similarity Score: 0.958
-------------
Recommendation 3:
Song: For Me It's You | Artist: Train | Similarity Score: 0.956
-------------
Recommendation 4:
Song: Haunted Bumps | Artist: Insane Clown Posse | Similarity Score: 0.956
-------------
