### Music Recommendation System

#### Import Libraries

In [1]:
#Import required libraries
import numpy as np
import pandas as pd
import sklearn
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

#### Load the Dataset

In [2]:
#Load rating dataset
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTl5ZC3Qe9CwagF0ZGpRPVS-prcfCch7GABtNg4vag0ZxPZYEMsZOIosCyoNUbX1zRq1LmBxt9tDbO1/pub?gid=509751214&single=true&output=csv"
ratings = pd.read_csv(url)
print(ratings.head())


   userId  musicId  rating  likes
0     562    22640     3.5      4
1      32    14320     5.0      0
2     994     9324     3.0      0
3     730    14645     3.5      4
4     689    19182     5.0      0


In [3]:
#Load music dataset
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTl5ZC3Qe9CwagF0ZGpRPVS-prcfCch7GABtNg4vag0ZxPZYEMsZOIosCyoNUbX1zRq1LmBxt9tDbO1/pub?gid=0&single=true&output=csv"
music = pd.read_csv(url)
print(music.head())

   musicId                                              title genre
0        1  If You Gotta Go (By The Flying Burrito Brother...   Pop
1        2  Yesterday, When I Was Young (By Shirley Bassey...   Pop
2        3               Home Again (By Glen Campbell) (1970)   Pop
3        4                       The Stealer (By Free) (1970)   Pop
4        5              Gotta Travel On (By Bob Dylan) (1970)   Pop


#### Statistical Checks

In [4]:
#Brief Stat Check
n_ratings = len(ratings) #number of ratings
n_songs = len(ratings['musicId'].unique()) #number of songs
n_users = len(ratings['userId'].unique()) #number of unique users

print(f"Number of ratings: {n_ratings}")
print(f"Number of unique musicId's: {n_songs}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per song: {round(n_ratings/n_songs, 2)}")

Number of ratings: 249975
Number of unique musicId's: 23493
Number of unique users: 1512
Average ratings per user: 165.33
Average ratings per song: 10.64


In [5]:
#User Rating Frequency
user_freq = ratings[['userId', 'musicId']].groupby(
    'userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
print(user_freq.head())

   userId  n_ratings
0       1        155
1       2        171
2       3        157
3       4        161
4       5        178


In [None]:
#Song Rating Stat

#Find Lowest and Highest rated songs:
mean_rating = ratings.groupby('musicId')[['rating']].mean()

#Lowest rated songs
lowest_rated = mean_rating['rating'].idxmin()
music.loc[music['musicId'] == lowest_rated]

#Highest rated songs
highest_rated = mean_rating['rating'].idxmax()
music.loc[music['musicId'] == highest_rated]

#Show information about the highest rated song
print(ratings[ratings['musicId']==highest_rated])
print("---------------------------------------")

#Show information about the lowest rated song
print(ratings[ratings['musicId']==lowest_rated])

        userId  musicId  rating  likes
179885     636    11994     5.0      0
---------------------------------------
        userId  musicId  rating  likes
102087    1481     8538     1.0      0
198659     541     8538     1.0      0


#### Music Genre Preprocessing

In [None]:
#Preprocess Genres
from sklearn.preprocessing import MultiLabelBinarizer

# Split genres into lists (e.g., ["Pop", "Rock", ...])
music['genres'] = music['genre'].str.split('|')

#One-hot encode genres
mlb = MultiLabelBinarizer()
genres_encoded = mlb.fit_transform(music['genres'])
genres_df = pd.DataFrame(genres_encoded, columns=mlb.classes_, index=music['musicId'])

#### Create CSR Matrix For Ratings & Likes

In [8]:
#Create user-item matrix using scipy csr matrix
def create_matrix(df):
    """
    Creates a sparse matrix where rows = music, columns = users, values = ratings.
    """

    N = len(df['userId'].unique())
    M = len(df['musicId'].unique())

    #Map Ids to indices
    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    music_mapper = dict(zip(np.unique(df["musicId"]), list(range(M))))

    #Map indices to IDs
    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    music_inv_mapper = dict(zip(list(range(M)), np.unique(df["musicId"])))

    user_index = [user_mapper[i] for i in df['userId']]
    music_index = [music_mapper[i] for i in df['musicId']]

    X = csr_matrix((df["rating"], (music_index, user_index)), shape=(M, N))

    return X, user_mapper, music_mapper, user_inv_mapper, music_inv_mapper

X, user_mapper, music_mapper, user_inv_mapper, music_inv_mapper = create_matrix(ratings)

In [9]:
#Create X_likes (Sparse Matrix for Likes)
def create_likes_matrix(df):
    """
    Creates a sparse matrix where rows = music, columns = users, values = likes.
    """
    N = len(df['userId'].unique())
    M = len(df['musicId'].unique())
    
    #Map Ids to indices
    user_mapper_likes = dict(zip(np.unique(df["userId"]), list(range(N))))
    music_mapper_likes = dict(zip(np.unique(df["musicId"]), list(range(M))))

    
    user_index_likes = [user_mapper_likes[i] for i in df['userId']]
    music_index_likes = [music_mapper_likes[i] for i in df['musicId']]
    
    X_likes = csr_matrix((df["likes"], (music_index_likes, user_index_likes)), shape=(M, N))
    return X_likes

X_likes = create_likes_matrix(ratings)

#### Create Hybrid Similarity Metric Function

In [None]:
#Hybrid Similarity Metric Function
from sklearn.metrics.pairwise import cosine_similarity

def hybrid_similarity(music_id, X, X_likes, genres_df, rating_weight=0.5, likes_weight=0.3, genre_weight=0.2):
    """
    Computes hybrid similarity using ratings, likes, and genres.
    Returns: Combined similarity scores for all music relative to `music_id`.
    """
    #Get the index of the target song
    music_ind = music_mapper[music_id]
    
    #Rating-based similarity
    rating_sim = cosine_similarity(X[music_ind], X).flatten()
    
    #Likes-based similarity (assuming X_likes is a sparse matrix of likes)
    likes_sim = cosine_similarity(X_likes[music_ind], X_likes).flatten()
    
    #Genre-based similarity
    genre_sim = cosine_similarity(genres_df.loc[music_id].values.reshape(1, -1), genres_df).flatten()
    
    #Combine similarities with weights
    combined_sim = (
        (rating_weight * rating_sim) + 
        (likes_weight * likes_sim) + 
        (genre_weight * genre_sim)
    )
    
    return combined_sim

#### Find Similar Songs

In [None]:
#Find similar music with hybrid similarity model

def find_similar_music_hybrid(music_id, X, X_likes, genres_df, k=10, rating_weight=0.5, likes_weight=0.3, genre_weight=0.2):
    """
    Finds similar music using hybrid similarity (rating + likes + genre).
    Returns: List of similar music IDs.
    """
    #Compute hybrid similarity scores
    sim_scores = hybrid_similarity(
        music_id, 
        X, 
        X_likes, 
        genres_df, 
        rating_weight, 
        likes_weight, 
        genre_weight
    )
    
    #Get top-k most similar music (excluding itself)
    similar_music_indices = np.argsort(sim_scores)[-k-1:-1][::-1]  #Skip the highest (self)
    similar_music_ids = [music_inv_mapper[idx] for idx in similar_music_indices]
    
    return similar_music_ids

music_titles = dict(zip(music['musicId'], music['title']))

music_id = 595  #Sample music ID

#Get similar music IDs using hybrid similarity
similar_ids = find_similar_music_hybrid(
        music_id, 
        X,  #Sparse matrix of ratings
        X_likes,    #Sparse matrix of likes
        genres_df,   #One-hot encoded genres DataFrame
        k=10,        #Number of recommendations
        rating_weight=0.5, 
        likes_weight=0.3, 
        genre_weight=0.2
)

#Get the title and genre of the input music
music_title = music_titles.get(music_id, "Unknown Song")
music_genre = music.loc[music['musicId'] == music_id, 'genres'].iloc[0]

#Print the results with genres
print(f"Since you listened to {music_title} ({music_genre}), here are similar songs:")
for song_id in similar_ids:
    song_title = music_titles.get(song_id, "Unknown Song")
    song_genre = music.loc[music['musicId'] == song_id, 'genres'].iloc[0]
    print(f"- {song_title} ({song_genre})")

Since you listened to Make Me Smile (Come Up And See Me) (By Steve Harley) (1975) (['Pop']), here are similar songs:
- Gdfr (Feat. Sage The Gemini & Lookas) (By Flo Rida) (2014) (['Pop'])
- Two Person Party (By Three Plus) (1999) (['Pop'])
- All That I'Ve Got (By The Used) (2004) (['Pop'])
- Corona (By Minutemen) (1984) (['Pop'])
- 1880 Or So (By Television) (1992) (['Pop'])
- I'Ve Been Tired (By Pixies) (1987) (['Pop'])
- I Feel The Earth Move (By Carole King) (1971) (['Pop'])
- Paper Planes (By M.I.A.) (2007) (['Pop'])
- Heart To Hang Onto (By Pete Townshend) (1977) (['Pop'])
- Rain Is Falling (By Electric Light Orchestra) (1981) (['Pop'])


In [12]:
#Find similar music using KNN model

def find_similar_music(music_id, X, k, metric='cosine', show_distance=False):

    neighbour_ids = []

    music_ind = music_mapper[music_id]
    music_vec = X[music_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    music_vec = music_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(music_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(music_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids


music_titles = dict(zip(music['musicId'], music['title']))

music_id = 595

similar_ids = find_similar_music(music_id, X, k=10)
music_title = music_titles[music_id]

print(f"Since you listened to {music_title} ({music.loc[music['musicId'] == music_id, 'genres'].iloc[0]}), here is a list of similar songs:")
for i in similar_ids:
    print(f"- {music_titles[i]} ({music.loc[music['musicId'] == i, 'genres'].iloc[0]})")

Since you listened to Make Me Smile (Come Up And See Me) (By Steve Harley) (1975) (['Pop']), here is a list of similar songs:
- Gdfr (Feat. Sage The Gemini & Lookas) (By Flo Rida) (2014) (['Pop'])
- Open Your Eyes, You Can Fly (By Lizz Wright) (2003) (['Jazz'])
- Two Person Party (By Three Plus) (1999) (['Pop'])
- Veracruz (By Warren Zevon) (1978) (['Country'])
- Hey, Hey, Hey, Hey (By Elvin Bishop) (1975) (['Blues'])
- All That I'Ve Got (By The Used) (2004) (['Pop'])
- Speak Of The Devil (By Black Pistol Fire) (2017) (['Blues'])
- My Idea Of Fun (By The Stooges) (2007) (['Blues'])
- Love Song (By The Damned) (1979) (['Rock'])
- That'S All I Want From You (By Nina Simone) (1978) (['Blues'])


#### Recommend New Songs

In [None]:
#Create function to recommend songs based on the users preference using hybrid model

def recommend_music_for_user_hybrid(user_id, X, X_likes, genres_df, k=10):
    """
    Recommends music based on the user's highest-rated music, using hybrid similarity.
    """
    #Get the user's ratings
    user_ratings = ratings[ratings['userId'] == user_id]
    if user_ratings.empty:
        print(f"User {user_id} not found.")
        return
    
    #Get the highest-rated music ID
    highest_rated_music_id = user_ratings.loc[user_ratings['rating'].idxmax(), 'musicId']
    
    #Verify the music exists in the catalog
    if highest_rated_music_id not in music_titles:
        print(f"Music ID {highest_rated_music_id} not found in catalog.")
        return
    
    #Get music details
    music_title = music_titles[highest_rated_music_id]
    music_genre = music.loc[music['musicId'] == highest_rated_music_id, 'genres'].iloc[0]
    
    #Find similar music
    similar_ids = find_similar_music_hybrid(
        highest_rated_music_id, 
        X, 
        X_likes, 
        genres_df, 
        k=k
    )
    
    #Display recommendations
    print(f"Since you listened to {music_title} ({music_genre}), you might also like:")
    for song_id in similar_ids:
        if song_id in music_titles:  #Verify song exists before displaying
            song_title = music_titles[song_id]
            song_genre = music.loc[music['musicId'] == song_id, 'genres'].iloc[0]
            print(f"- {song_title} ({song_genre})")
        else:
            print(f"- Unknown song (ID: {song_id})")

In [14]:
#Create function to recommend songs based on the users preference using the KNN model.
def recommend_music_for_user(user_id, X, user_mapper, music_mapper, music_inv_mapper, k=10):
    df1 = ratings[ratings['userId'] == user_id]

    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return

    music_id = df1[df1['rating'] == max(df1['rating'])]['musicId'].iloc[0]

    music_titles = dict(zip(music['musicId'], music['title']))

    similar_ids = find_similar_music(music_id, X, k)
    music_title = music_titles.get(music_id, "Song not found")

    if music_title == "Song not found":
        print(f"Song with ID {music_id} not found.")
        return

    print(f"Since you listened to {music_title} ({music.loc[music['musicId'] == music_id, 'genres'].iloc[0]}), you might also like:")
    for i in similar_ids:
        print(f"- {music_titles.get(i, 'Song not found')} ({music.loc[music['musicId'] == i, 'genres'].iloc[0]})")

In [15]:
recommend_music_for_user_hybrid(user_id=150, X=X, X_likes=X_likes, genres_df=genres_df, k=10)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          

Since you listened to Consequences (By Marion Meadows) (2018) (['Jazz']), you might also like:
- It Won'T Stop (By Typical Cats) (2001) (['Jazz'])
- Dig My Feeling (By Willie Bobo) (2016) (['Jazz'])
- Harvest Time (By Herbie Hancock) (1979) (['Jazz'])
- A Bushel And A Peck (By Perry Como) (1993) (['Jazz'])
- Alone Again (Naturally) (By Diana Krall) (2014) (['Jazz'])
- Peace (By Branford Marsalis) (1989) (['Jazz'])
- Definition (By Kruder & Dorfmeister) (1993) (['Jazz'])
- Jersey Bounce (By Ella Fitzgerald) (1989) (['Jazz'])
- Just Give Me A Chance (By Peter White) (2009) (['Jazz'])
- I Adore You (By Esperanza Spalding) (2008) (['Jazz'])


In [16]:
user_id = 150  #Replace with the desired user ID
recommend_music_for_user(user_id, X, user_mapper, music_mapper, music_inv_mapper, k=10)

Since you listened to Consequences (By Marion Meadows) (2018) (['Jazz']), you might also like:
- Let The Games Begin (By Ajr) (2015) (['Pop'])
- Remember The Time (By Michael Jackson) (1991) (['Pop'])
- Angels Don'T Cry (By The Psychedelic Furs) (1987) (['Pop'])
- Dance With The Devil (By Breaking Benjamin) (2006) (['Pop'])
- Take Me Away (By George Strait) (2019) (['Country'])
- New Theory (By Washed Out) (2009) (['Rock'])
- Inside Burning (By Christafari) (1994) (['Reggae'])
- Danger At My Door (By Mark Chesnutt) (1990) (['Country'])
- Flowers In Your Hair (By The Lumineers) (2012) (['Pop'])
- Jersey Bounce (By Ella Fitzgerald) (1989) (['Jazz'])


In [17]:
recommend_music_for_user_hybrid(user_id=1424, X=X, X_likes=X_likes, genres_df=genres_df, k=10)  

Since you listened to As Long As You Love Me (By Backstreet Boys) (1996) (['Pop']), you might also like:
- Hold On Tight (By Electric Light Orchestra) (1973) (['Pop'])
- Corridor Of Dreams (By The Cleaners From Venus) (1982) (['Pop'])
- Sometime World (By Wishbone Ash) (1972) (['Pop'])
- I Dreamed I Dream (By Sonic Youth) (1981) (['Pop'])
- Hope (By Shaggy) (2000) (['Pop'])
- 1985 (By Bowling For Soup) (2004) (['Pop'])
- Summertime Sadness (Lana Del Rey Vs. Cedric Gervais) (By Lana Del Rey) (2013) (['Pop'])
- Love Is Alright Tonite (By Rick Springfield) (1981) (['Pop'])
- Alone With You (By Tevin Campbell) (1991) (['Pop'])
- Devil Inside (By Inxs) (1987) (['Pop'])


In [18]:
user_id = 1424  #Replace with the desired user ID
recommend_music_for_user(user_id, X, user_mapper, music_mapper, music_inv_mapper, k=10)

Since you listened to As Long As You Love Me (By Backstreet Boys) (1996) (['Pop']), you might also like:
- Mail Order Mystics (By John Mayall) (1993) (['Blues'])
- In Your Letter (By Reo Speedwagon) (1980) (['Rock'])
- So High (Yeti Beats Remix) (By Rebelution) (2011) (['Reggae'])
- Want (By Jawbreaker) (1990) (['Rock'])
- Sleepless Night (By The Judds) (1989) (['Country'])
- Sun God (By The Delta Saints) (2017) (['Blues'])
- Lean Wit It, Rock Wit It (By Dem Franchize Boyz) (2006) (['Pop'])
- Jah Love Me (By Israel Vibration) (1981) (['Reggae'])
- I'M So Tired (By Fugazi) (1999) (['Rock'])
- Empty Apartment (By Yellowcard) (2003) (['Pop'])
