# Music Recommendation System
This project uses a dataset of user listening history to recommend songs using:

- Popularity-based recommendations
- Item similarity-based recommendations

Built as a Machine Learning major project.

Import libraries

In [None]:
import pandas as pd
import numpy as np

Load and prepare data

In [None]:
triplet_df = pd.read_csv('/content/triplets_file.csv')
song_data = pd.read_csv('/content/song_data.csv')

print(triplet_df.shape)
print(song_data.shape)
triplet_df.head()

(365829, 3)
(294496, 5)


Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1.0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2.0
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1.0
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1.0
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1.0


Merge dataset

In [None]:
# Merge the two datasets on song_id
song_data = song_data.drop_duplicates(['song_id'])
song_df = pd.merge(triplet_df, song_data, on='song_id', how='left')

# Drop rows with missing values
song_df = song_df.dropna(subset=['title', 'artist_name']).copy()

# Create a new combined feature
song_df['song'] = song_df['title'] + ' - ' + song_df['artist_name']

Subset and recent index for performance

In [None]:
# Limit to 50,000 rows for performance
song_df = song_df.head(50000)

# Reset index to avoid KeyError later
song_df = song_df.reset_index(drop=True)

In [None]:
# Remove rows with missing title or artist_name safely
song_df = song_df.dropna(subset=['title', 'artist_name']).copy()

# Now create the combined song field
song_df['song'] = song_df['title'] + ' - ' + song_df['artist_name']

Show most popular songs {Grouping}

In [None]:
song_grouped = song_df.groupby('song').agg({'listen_count': 'count'}).reset_index()
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['percentage'] = (song_grouped['listen_count'] / grouped_sum) * 100

# Display top 10 popular songs
song_grouped.sort_values(['listen_count', 'song'], ascending=[False, True]).head(10)

Unnamed: 0,song,listen_count,percentage
621,Dog Days Are Over (Radio Edit) - Florence + Th...,593,1.186
2699,Undo - Björk,590,1.18
412,Canada - Five Iron Frenzy,292,0.584
2017,Représente - Alliance Ethnik,265,0.53
2197,Sincerité Et Jalousie - Alliance Ethnik,257,0.514
1314,Just Dance - Lady GaGa / Colby O'Donis,254,0.508
377,Bulletproof - La Roux,247,0.494
80,Ain't Misbehavin - Sam Cooke,244,0.488
1495,Lucky (Album Version) - Jason Mraz & Colbie Ca...,233,0.466
2242,Somebody To Love - Justin Bieber,232,0.464


Add recomender code

Create recomender.py

In [None]:

code = """
import numpy as np
import pandas as pd

class popularity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.popularity_recommendations = None

    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

        train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index()
        train_data_grouped.rename(columns={self.user_id: 'score'}, inplace=True)

        train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending=[False, True])
        train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=False, method='first')

        self.popularity_recommendations = train_data_sort

    def recommend(self, user_id):
        user_recommendations = self.popularity_recommendations.copy()
        user_recommendations['user_id'] = user_id
        columns = ['user_id', self.item_id, 'score', 'Rank']
        return user_recommendations[columns].head(10)

class item_similarity_recommender_py():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.cooccurence_matrix = None

    def get_user_items(self, user):
        user_data = self.train_data[self.train_data[self.user_id] == user]
        return list(user_data[self.item_id].unique())

    def get_item_users(self, item):
        item_data = self.train_data[self.train_data[self.item_id] == item]
        return set(item_data[self.user_id].unique())

    def get_all_items_train_data(self):
        return list(self.train_data[self.item_id].unique())

    def construct_cooccurence_matrix(self, user_items, all_items):
        cooccurence_matrix = np.matrix(np.zeros((len(user_items), len(all_items))), float)
        for i in range(len(user_items)):
            users_i = self.get_item_users(user_items[i])
            for j in range(len(all_items)):
                users_j = self.get_item_users(all_items[j])
                intersection = users_i.intersection(users_j)
                if len(intersection) != 0:
                    cooccurence_matrix[i, j] = float(len(intersection)) / (len(users_i) + len(users_j) - len(intersection))
                else:
                    cooccurence_matrix[i, j] = 0
        return cooccurence_matrix

    def generate_top_recommendations(self, user, cooccurence_matrix, all_items, user_items):
        user_sim_scores = cooccurence_matrix.sum(axis=0) / float(cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()
        sort_index = sorted(((e, i) for i, e in enumerate(user_sim_scores)), reverse=True)

        recommendations = []
        rank = 1
        for score, i in sort_index:
            if not np.isnan(score) and all_items[i] not in user_items and rank <= 10:
                recommendations.append([user, all_items[i], score, rank])
                rank += 1
        return pd.DataFrame(recommendations, columns=['user_id', 'song', 'score', 'rank'])

    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

    def recommend(self, user):
        user_items = self.get_user_items(user)
        all_items = self.get_all_items_train_data()
        cooccurence_matrix = self.construct_cooccurence_matrix(user_items, all_items)
        return self.generate_top_recommendations(user, cooccurence_matrix, all_items, user_items)

    def get_similar_items(self, item_list):
        all_items = self.get_all_items_train_data()
        cooccurence_matrix = self.construct_cooccurence_matrix(item_list, all_items)
        return self.generate_top_recommendations('', cooccurence_matrix, all_items, item_list)
"""

with open("Recommenders.py", "w") as f:
    f.write(code)

Import and Run popularity recomender

In [None]:
import Recommenders as Recommenders

user_id = song_df['user_id'][5]  # Select any user

pr = Recommenders.popularity_recommender_py()
pr.create(song_df, 'user_id', 'song')
pr.recommend(user_id)

Unnamed: 0,user_id,song,score,Rank
621,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Dog Days Are Over (Radio Edit) - Florence + Th...,593,1.0
2699,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Undo - Björk,590,2.0
412,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Canada - Five Iron Frenzy,292,3.0
2017,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Représente - Alliance Ethnik,265,4.0
2197,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Sincerité Et Jalousie - Alliance Ethnik,257,5.0
1314,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Just Dance - Lady GaGa / Colby O'Donis,254,6.0
377,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Bulletproof - La Roux,247,7.0
80,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Ain't Misbehavin - Sam Cooke,244,8.0
1495,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Lucky (Album Version) - Jason Mraz & Colbie Ca...,233,9.0
2242,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Somebody To Love - Justin Bieber,232,10.0


Run item similarity recommender

In [None]:
ir = Recommenders.item_similarity_recommender_py()
ir.create(song_df, 'user_id', 'song')

# User's song history
user_items = ir.get_user_items(user_id)
for song in user_items:
    print(song)

# Recommendations based on history
ir.recommend(user_id)

# Similar songs
ir.get_similar_items(['Use Somebody - Kings Of Leon'])

Entre Dos Aguas - Paco De Lucia
Stronger - Kanye West
Paper Gangsta - Lady GaGa
Heaven's gonna burn your eyes - Thievery Corporation feat. Emiliana Torrini
Let It Be Sung - Jack Johnson / Matt Costa / Zach Gill / Dan Lebowitz / Steve Adams
I'll Be Missing You (Featuring Faith Evans & 112)(Album Version) - Puff Daddy
Love Shack - The B-52's
Behind The Sea [Live In Chicago] - Panic At The Disco
Country Road - Jack Johnson / Paula Fuga
Oh No - Andrew Bird
Love Song For No One - John Mayer
The Middle - Jimmy Eat World
The Christmas Song  (LP Version) - King Curtis
Drive - Incubus
Right Back - Sublime


Unnamed: 0,user_id,song,score,rank
0,,Me Da Igual - Camila,0.0,1
1,,Crying Like A Church On Monday - New Radicals,0.0,2
2,,Coming Of Age - Jay-Z featuring Memphis Bleek,0.0,3
3,,Artifacts of the black rain - In Flames,0.0,4
4,,My Favorite Mistake - Sheryl Crow,0.0,5
5,,Drown - Three Days Grace,0.0,6
6,,Unloveable - The Smiths,0.0,7
7,,Didgeridoo - Aphex Twin,0.0,8
8,,Full Moon - The Black Ghosts,0.0,9
9,,Fortress Around Your Heart - Sting,0.0,10


Based on selected song provide recommendation

In [None]:
ir.get_similar_items(['Oliver James - Fleet Foxes', 'The End - Pearl Jam'])

Unnamed: 0,user_id,song,score,rank
0,,Me Da Igual - Camila,0.0,1
1,,Crying Like A Church On Monday - New Radicals,0.0,2
2,,Coming Of Age - Jay-Z featuring Memphis Bleek,0.0,3
3,,Artifacts of the black rain - In Flames,0.0,4
4,,My Favorite Mistake - Sheryl Crow,0.0,5
5,,Drown - Three Days Grace,0.0,6
6,,Unloveable - The Smiths,0.0,7
7,,Didgeridoo - Aphex Twin,0.0,8
8,,Full Moon - The Black Ghosts,0.0,9
9,,Fortress Around Your Heart - Sting,0.0,10


Recommendation for another song

In [None]:
ir.get_similar_items(['Use Somebody - Kings Of Leon'])

Unnamed: 0,user_id,song,score,rank
0,,Me Da Igual - Camila,0.0,1
1,,Crying Like A Church On Monday - New Radicals,0.0,2
2,,Coming Of Age - Jay-Z featuring Memphis Bleek,0.0,3
3,,Artifacts of the black rain - In Flames,0.0,4
4,,My Favorite Mistake - Sheryl Crow,0.0,5
5,,Drown - Three Days Grace,0.0,6
6,,Unloveable - The Smiths,0.0,7
7,,Didgeridoo - Aphex Twin,0.0,8
8,,Full Moon - The Black Ghosts,0.0,9
9,,Fortress Around Your Heart - Sting,0.0,10


## Conclusion
- Popularity model gives general trending songs.
- Item similarity model gives personalized recommendations based on user listening history.