In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
data_path = "/content/drive/MyDrive/Dicoding/Capstone/CBF.csv"
df = pd.read_csv(data_path)

In [None]:
df

Unnamed: 0,artist_name,music_name,music_id,moods,gabungan
0,Adele,Easy On Me,0gplL1WMoJ6iYaPgMCL0gX,Happy,Happy Adele
1,Rosalía,LA FAMA (with The Weeknd),6dmXZ9B5HdFAyzHeTneYBK,Relaxed,Relaxed Rosalía
2,DojaCat,Woman,6Uj1ctrBOjOas8xZXGqKk4,Sad,Sad DojaCat
3,Adele,Oh My God,3Kkjo3cT83cw09VJyrLNwX,Anxious,Anxious Adele
4,TravisScott,ESCAPE PLAN,4R67rQNSbbsR4TdUVOIdez,Happy,Happy TravisScott
...,...,...,...,...,...
26710,Legado7 JuniorH,Ojos De Maniaco,52Cpyvd2dKb6XRn313nH87,Relaxed,Relaxed Legado7 JuniorH
26711,LeonBridges TerraceMartin,Sweeter (feat. Terrace Martin),1ehhGlTvjtHo2e4xJFB0SZ,Happy,Happy LeonBridges TerraceMartin
26712,Kygo OhWonder,How Would I Know,52eycxprLhK3lPcRLbQiVk,Happy,Happy Kygo OhWonder
26713,CashCash AndyGrammer,I Found You,3wYOGJYD31sLRmBgCvWxa4,Anxious,Anxious CashCash AndyGrammer


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
 
# Inisialisasi TfidfVectorizer
tf = TfidfVectorizer()
 
# Melakukan perhitungan idf pada data cuisine
tf.fit(df['moods']) 
 
# Mapping array dari fitur index integer ke fitur nama
tf.get_feature_names() 



['anxious', 'happy', 'relaxed', 'sad']

In [None]:
# Melakukan fit lalu ditransformasikan ke bentuk matrix
tfidf_matrix = tf.fit_transform(df['moods']) 
 
# Melihat ukuran matrix tfidf
tfidf_matrix.shape

(26715, 4)

In [None]:
# Mengubah vektor tf-idf dalam bentuk matriks dengan fungsi todense()
tfidf_matrix.todense()

matrix([[0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        ...,
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.]])

In [None]:
# Membuat dataframe untuk melihat tf-idf matrix
# Kolom diisi dengan jenis masakan
# Baris diisi dengan nama resto
 
pd.DataFrame(
    tfidf_matrix.todense(), 
    columns=tf.get_feature_names(),
    index=df['music_name'],
)



Unnamed: 0_level_0,anxious,happy,relaxed,sad
music_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Easy On Me,0.0,1.0,0.0,0.0
LA FAMA (with The Weeknd),0.0,0.0,1.0,0.0
Woman,0.0,0.0,0.0,1.0
Oh My God,1.0,0.0,0.0,0.0
ESCAPE PLAN,0.0,1.0,0.0,0.0
...,...,...,...,...
Ojos De Maniaco,0.0,0.0,1.0,0.0
Sweeter (feat. Terrace Martin),0.0,1.0,0.0,0.0
How Would I Know,0.0,1.0,0.0,0.0
I Found You,1.0,0.0,0.0,0.0


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
 
# Menghitung cosine similarity pada matrix tf-idf
cosine_sim = cosine_similarity(tfidf_matrix) 
cosine_sim

array([[1., 0., 0., ..., 1., 0., 1.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 1., 0., 1.]])

In [None]:
# Membuat dataframe dari variabel cosine_sim dengan baris dan kolom berupa nama resto
cosine_sim_df = pd.DataFrame(cosine_sim, index=df['music_name'], columns=df['music_name'])
print('Shape:', cosine_sim_df.shape)
 
# Melihat similarity matrix pada setiap resto
cosine_sim_df.sample(5, axis=1).sample(10, axis=0)

Shape: (26715, 26715)


music_name,St. Elmos Fire (Man in Motion),Tears of Gold,Windowpane,Meeting in My Bedroom,Art Deco
music_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Shot Reverse Shot,0.0,0.0,0.0,0.0,0.0
Made Love First,0.0,1.0,0.0,0.0,0.0
Love Is Mystical,0.0,0.0,0.0,0.0,0.0
Country Sh*t (Remix),0.0,0.0,0.0,1.0,1.0
Everything (feat. John Legend),0.0,0.0,0.0,1.0,1.0
Givin the Dog a Bone,0.0,0.0,0.0,0.0,0.0
"Vuela, Vuela (Voyage, Voyage)",0.0,0.0,0.0,0.0,0.0
Give Me A Sign,1.0,0.0,1.0,0.0,0.0
Good Things Fall Apart (with Jon Bellion),1.0,0.0,1.0,0.0,0.0
Eye of the Tiger,0.0,1.0,0.0,0.0,0.0


In [None]:
def music_recommendations(music, similarity_data=cosine_sim_df, items=df[['music_name','artist_name','music_id','moods']], k=10):
    
    index = similarity_data.loc[:,music].to_numpy().argpartition(range(-1, -k, -1))
    
    closest = similarity_data.columns[index[-1:-(k+2):-1]]
    
    closest = closest.drop(music, errors='ignore')
 
    return pd.DataFrame(closest).merge(items).head(k)

In [None]:
df[df['music_name'].eq('Wanna Be Close')]

Unnamed: 0,artist_name,music_name,music_id,moods,gabungan
22641,Avant,Wanna Be Close,3TfBp81XBxnU5RLDKgD8nR,Relaxed,Relaxed Avant


In [None]:
music_recommendations('Wanna Be Close')

Unnamed: 0,music_name,artist_name,music_id,moods
0,Que Te Ruegue Quien Te Quiera,BandaElRecodo,4AooK5Hh8OZkZ1XjvVGuTY,Relaxed
1,Que Te Ruegue Quien Te Quiera,BandaElRecodo,6CMUbdue9apkMR5RvIuA3H,Relaxed
2,La Cumbia del Río,LosPikadientesDeCaborca,1Xrg7xC0pXOEErZ3NwMBia,Relaxed
3,Yo Soy Tu Maestro,LosTelez,23EzYG5WcweDqh5RX9nFaW,Relaxed
4,Yo Soy Tu Maestro,LosTelez,760Gu5DFBFjADZq5HvCHrR,Relaxed
5,Shower the People - 2019 Remaster,JamesTaylor,1Pwcxq35Hl2kqwUVd5XCRg,Relaxed
6,Ebrio de Amor,ValentínElizalde,2h9eJrxNG298kW8ar3Oci9,Relaxed
7,De vuelta y vuelta,JarabeDePalo,1laHpjcuexHOKLjjIBbvnP,Relaxed
8,Spill The Wine,EricBurdon War,2nluoJKvE7rTBN7kve8dnr,Relaxed
9,Amor de Estudiante,RobertoJordan,0nwKAvbDJeBs5dPH8sdeRO,Relaxed
