In [656]:
import pandas as pd
import numpy as np
from typing import List, Dict

In [657]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


### Loading Dataset


In [658]:
song_df_1 = pd.read_csv('triplets_file.csv')
song_df_1.head()


Unnamed: 0,user_id,song_id,listen_count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1


In [659]:
song_df_2 = pd.read_csv('song_data.csv')
song_df_2.head()


Unnamed: 0,song_id,title,release,artist_name,year
0,SOQMMHC12AB0180CB8,Silent Night,Monster Ballads X-Mas,Faster Pussy cat,2003
1,SOVFVAK12A8C1350D9,Tanssi vaan,Karkuteillä,Karkkiautomaatti,1995
2,SOGTUKN12AB017F4F1,No One Could Ever,Butter,Hudson Mohawke,2006
3,SOBNYVR12A8C13558C,Si Vos Querés,De Culo,Yerba Brava,2003
4,SOHSBXH12A8C13B0DF,Tangle Of Aspens,Rene Ablaze Presents Winter Sessions,Der Mystic,0


In [660]:
song_df = pd.merge(song_df_1, song_df_2.drop_duplicates(
    ['song_id']), on='song_id', how='left')
song_df.head()


Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999


In [661]:
print(len(song_df_1), len(song_df_2), len(song_df))


2000000 1000000 2000000


DATA PREPROCESSING


In [662]:
# for item similaarity
song_df['tags'] = song_df['title'] + ' - ' + song_df['artist_name'] + \
    ' - ' + song_df['release'] + ' - ' + str(song_df['year'])
song_df.head()


Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,tags
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0,The Cove - Jack Johnson - Thicker Than Water -...
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976,Entre Dos Aguas - Paco De Lucia - Flamenco Par...
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007,Stronger - Kanye West - Graduation - 0 ...
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005,Constellations - Jack Johnson - In Between Dre...
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999,Learn To Fly - Foo Fighters - There Is Nothing...


FINDING POPULAR SONGS


first display caluculating rank(by percentage)


In [663]:
# Cumuilative sum of listen count of the song
song_grouped = song_df.groupby(['tags']).agg(
    {'listen_count': 'count'}).reset_index()
song_grouped.head()


Unnamed: 0,tags,listen_count
0,#!*@ You Tonight [Featuring R. Kelly] (Explici...,78
1,#40 - DAVE MATTHEWS BAND - Listener Supported ...,338
2,& Down - Boys Noize - Oi Oi oi REMIXED - 0 ...,373
3,' Cello Song - Nick Drake - Way To Blue - An I...,103
4,'97 Bonnie & Clyde - Eminem - The Slim Shady L...,93


In [664]:
grouped_sum = song_grouped['listen_count'].sum()
song_grouped['Percentage'] = (
    song_grouped['listen_count'] / grouped_sum) * 100  # adding percentage column
song_grouped.sort_values(['listen_count', 'tags'], ascending=[0, 1])


Unnamed: 0,tags,listen_count,Percentage
7154,Sehr kosmisch - Harmonia - Musik von Harmonia ...,8277,0.41385
9113,Undo - Björk - Vespertine Live - 0 ...,7032,0.35160
2069,Dog Days Are Over (Radio Edit) - Florence + Th...,6949,0.34745
9912,You're The One - Dwight Yoakam - If There Was ...,6412,0.32060
6801,Revelry - Kings Of Leon - Only By The Night - ...,6145,0.30725
...,...,...,...
3536,Historia Del Portero - Ricardo Arjona - Histor...,51,0.00255
7098,Scared - Three Days Grace - Three Days Grace (...,51,0.00255
2148,Don´t Leave Me Now - Amparanoia - Enchilao - 0...,50,0.00250
2997,Ghosts (Toxic Avenger Mix) - Ladytron - Ghosts...,48,0.00240


POPULARITY RECOMMENDATION ENGINE


In [665]:
# real code/engine
# calculate using rank(without percentage)


In [666]:
class popularity_recommender_py():
    def __init__(self):
        self.train_data = None  # song_df
        self.user_id = None
        self.item_id = None
        self.popularity_recommendations = None

    def create(self, train_data, user_id, item_id):
        self.train_data = train_data
        self.user_id = user_id
        self.item_id = item_id

        train_data_grouped = train_data.groupby([self.item_id]).agg(
            {self.user_id: 'count'}).reset_index()
        train_data_grouped.rename(columns={'user_id': 'score'}, inplace=True)

        train_data_sort = train_data_grouped.sort_values(
            ['score', self.item_id], ascending=[0, 1])

        train_data_sort['Rank'] = train_data_sort['score'].rank(
            ascending=0, method='first')

        self.popularity_recommendations = train_data_sort.head(10)

    def recommend(self, user_id):
        user_recommendations = self.popularity_recommendations

        user_recommendations['user_id'] = user_id

        cols = user_recommendations.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        user_recommendations = user_recommendations[cols]

        return user_recommendations


In [667]:
pr = popularity_recommender_py()


In [668]:
pr.create(song_df, 'user_id', 'title')


In [669]:
pr.recommend(song_df['user_id'][12])


Unnamed: 0,user_id,title,score,Rank
6836,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Sehr kosmisch,8277,1.0
8725,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Undo,7032,2.0
1964,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Dog Days Are Over (Radio Edit),6949,3.0
9496,b80344d063b5ccb3212f76538f3d9e43d87dca9e,You're The One,6729,4.0
6498,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Revelry,6145,5.0
6825,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Secrets,5841,6.0
3437,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Horn Concerto No. 4 in E flat K495: II. Romanc...,5385,7.0
2595,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Fireflies,4795,8.0
3322,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Hey_ Soul Sister,4758,9.0
8494,b80344d063b5ccb3212f76538f3d9e43d87dca9e,Tive Sim,4548,10.0


In [670]:
pr.recommend(song_df['user_id'][123])


Unnamed: 0,user_id,title,score,Rank
6836,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Sehr kosmisch,8277,1.0
8725,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Undo,7032,2.0
1964,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Dog Days Are Over (Radio Edit),6949,3.0
9496,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,You're The One,6729,4.0
6498,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Revelry,6145,5.0
6825,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Secrets,5841,6.0
3437,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Horn Concerto No. 4 in E flat K495: II. Romanc...,5385,7.0
2595,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Fireflies,4795,8.0
3322,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Hey_ Soul Sister,4758,9.0
8494,9d6f0ead607ac2a6c2460e4d14fb439a146b7dec,Tive Sim,4548,10.0


ITEM SIMILARITY


In [671]:
song_df_3 = song_df.head(10000)
song_df_3.head()


Unnamed: 0,user_id,song_id,listen_count,title,release,artist_name,year,tags
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,The Cove,Thicker Than Water,Jack Johnson,0,The Cove - Jack Johnson - Thicker Than Water -...
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2,Entre Dos Aguas,Flamenco Para Niños,Paco De Lucia,1976,Entre Dos Aguas - Paco De Lucia - Flamenco Par...
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBXHDL12A81C204C0,1,Stronger,Graduation,Kanye West,2007,Stronger - Kanye West - Graduation - 0 ...
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBYHAJ12A6701BF1D,1,Constellations,In Between Dreams,Jack Johnson,2005,Constellations - Jack Johnson - In Between Dre...
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SODACBL12A8C13C273,1,Learn To Fly,There Is Nothing Left To Lose,Foo Fighters,1999,Learn To Fly - Foo Fighters - There Is Nothing...


In [672]:
tfidf = TfidfVectorizer(analyzer='word', stop_words='english')


In [673]:
tags_matrix = tfidf.fit_transform(song_df_3['tags'])


In [674]:
cosine_similarities = cosine_similarity(tags_matrix)


In [675]:
similarities = {}


In [676]:
for i in range(len(cosine_similarities)):
    # Now we'll sort each element in cosine_similarities and get the indexes of the songs.
    similar_indices = cosine_similarities[i].argsort()[:-50:-1]
    # After that, we'll store in similarities each name of the 50 most similar songs.
    # Except the first one that is the same song.
    similarities[song_df_3['title'].iloc[i]] = [
        (cosine_similarities[i][x], song_df_3['title'][x], song_df_3['artist_name'][x]) for x in similar_indices][1:]


In [719]:
class ContentBasedRecommender:
    def __init__(self, matrix):
        self.matrix_similar = matrix

    def _print_message(self, song, recom_song):
        rec_items = len(recom_song)

        print(f'The {rec_items} recommended songs for {song} are:')
        dat = []
        for i in range(rec_items):
            print(f"Number {i+1}:")
            print(f"{recom_song[i][1]} by {recom_song[i][2]} with {round(recom_song[i][0], 3)} similarity score")
            print("--------------------")
            dat.append(f"{recom_song[i][1]} by {recom_song[i][2]} with {round(recom_song[i][0], 3)} similarity score")
        # print recommended data
        list_box_1.delete(0, END)
        for i in dat:
            list_box_1.insert(END, i)

    def recommend(self, recommendation):
        # Get song to find recommendations for
        song = recommendation['song']
        # Get number of songs to recommend
        number_songs = recommendation['number_songs']
        # Get the number of songs most similars from matrix similarities
        recom_song = self.matrix_similar[song][:number_songs]
        # print each item
        self._print_message(song=song, recom_song=recom_song)


In [720]:
recommedations = ContentBasedRecommender(similarities)


In [721]:
# recommendation_1 = {
# "song": song_df_3['title'].iloc[100],
# "number_songs": 5
# }


In [722]:
# recommedations.recommend(recommendation_1)


In [723]:
# recommendation_2 = {
# "song": song_df_3['title'].iloc[2000],
# "number_songs": 5
# }


In [724]:
# recommedations.recommend(recommendation_2)


CREATING GUI


In [725]:
import tkinter as tki
from tkinter import BOTH, END, LEFT, ACTIVE

In [726]:
window = tki.Tk()
window.title("Music Recommendation System")
window.geometry("800x600")


''

In [727]:
# update the list box
def update(item):
    # clear the list box
    list_box.delete(0, END)
    # add items
    for i in item:
        list_box.insert(END, i)


In [728]:
# update entry box with listbox event
def fillout(event):
    # clear entry box
    entry_box.delete(0, END)
    # add clicked list item to entry box
    entry_box.insert(0, list_box.get(ACTIVE))


In [729]:
# create function to check entry vs list
def check(event):
    # grab what was typed
    typed = entry_box.get()

    if typed == '':
        item_1 = item_list
    else:
        item_1 = []
        for i in item_list:
            if typed.lower() in i.lower():
                item_1.append(i)
    # update our list box with selected item
    update(item_1)


In [730]:
# function to display recommended items
def start_recommend():
    rec = entry_box.get()
    idx = pd.Index(item_list)
    ind = idx.get_loc(rec)
    recommendation_1 = {
        "song": song_df_3['title'].iloc[ind],
        "number_songs": 5
    }
    recommedations.recommend(recommendation_1)

In [731]:
label = tki.Label(window, text="Music Recommendation System",
                  font=(20), fg="black").pack(pady=10)


In [732]:
label_search = tki.Label(window, text="Start Typing.....", font=(
    "Helvetica", 14), fg="grey").pack(pady=5)

In [733]:
entry_box = tki.Entry(window, font=("Helvetica", 15))
entry_box.pack()


In [734]:
list_box = tki.Listbox(window, width=35)
list_box.pack(pady=2)
item_list = song_df_3['title'].values
update(item_list)

In [735]:
# bindings
# create a binding on the listbox on click
list_box.bind("<<ListboxSelect>>", fillout)

# create a binding on the entry box
entry_box.bind("<KeyRelease>", check)


'2965933680896check'

In [736]:
b = tki.Button(window, text="Recommend",  command=start_recommend,width=(10))
b.pack(pady=10)

In [737]:
label_search = tki.Label(window, text="Recommended songs.....", font=(
    "Helvetica", 14), fg="grey").pack(pady=5)

In [738]:
list_box_1 = tki.Listbox(window, width=60)
list_box_1.pack(pady=2)

In [739]:
window.mainloop()


Exception in Tkinter callback
Traceback (most recent call last):
  File "c:\Users\Chinmay shakya\AppData\Local\Programs\Python\Python39\lib\tkinter\__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:\Users\Chinmay shakya\AppData\Local\Temp\ipykernel_18052\2032040502.py", line 10, in start_recommend
    recommedations.recommend(recommendation_1)
  File "C:\Users\Chinmay shakya\AppData\Local\Temp\ipykernel_18052\1511391862.py", line 26, in recommend
    recom_song = self.matrix_similar[song][:number_songs]
  File "c:\Users\Chinmay shakya\AppData\Local\Programs\Python\Python39\lib\site-packages\pandas\core\generic.py", line 1668, in __hash__
    raise TypeError(
TypeError: 'Series' objects are mutable, thus they cannot be hashed


The 5 recommended songs for The Cove are:
Number 1:
Moonshine by Jack Johnson with 0.721 similarity score
--------------------
Number 2:
Moonshine by Jack Johnson with 0.721 similarity score
--------------------
Number 3:
Holes To Heaven by Jack Johnson with 0.68 similarity score
--------------------
Number 4:
Taylor by Jack Johnson with 0.466 similarity score
--------------------
Number 5:
If I Could by Jack Johnson with 0.46 similarity score
--------------------
