# Load Data

## Import Library

In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

Mengimport library yang akan digunakan dalam project ini

## Import Data

In [5]:
from google.colab import files
uploaded = files.upload()

Saving SpotifyFeatures.csv to SpotifyFeatures (1).csv


In [6]:
df = pd.read_csv('SpotifyFeatures.csv')

print("\nContoh Data : ")
print(df.head())

print("\nInfo Data :")
print(df.info())

print("\nJumlah baris & kolam data :")
print(df.shape)

print("\nDeskripsi Data : ")
print(df.describe())

print("\nTipe Data : ")
print(df.dtypes)


Contoh Data : 
   genre        artist_name                        track_name  \
0  Movie     Henri Salvador       C'est beau de faire un Show   
1  Movie  Martin & les fées  Perdu d'avance (par Gad Elmaleh)   
2  Movie    Joseph Williams    Don't Let Me Be Lonely Tonight   
3  Movie     Henri Salvador    Dis-moi Monsieur Gordon Cooper   
4  Movie       Fabien Nataf                         Ouverture   

                 track_id  popularity  acousticness  danceability  \
0  0BRjO6ga9RKCKjfDqeFgWV           0         0.611         0.389   
1  0BjC1NfoEOOusryehmNudP           1         0.246         0.590   
2  0CoSDzoNIKCRs124s9uTVy           3         0.952         0.663   
3  0Gc6TVm52BwZD07Ki6tIvf           0         0.703         0.240   
4  0IuslXpMROHdEPvSl1fTQK           4         0.950         0.331   

   duration_ms  energy  instrumentalness key  liveness  loudness   mode  \
0        99373   0.910             0.000  C#    0.3460    -1.828  Major   
1       137373   0.737      

Menampilkan contoh data minimal 5 data, serta jumlah baris & kolom dan deskripsi hingga tipe data

# Persiapan Data yang Akan Digunakan


In [7]:
df_data = df[['track_name', 'artist_name', 'genre', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']].copy()

Mengelompokkan data kedalam df_data yang akan digunakan

In [8]:
df_data.drop_duplicates(inplace=True)

menghapus data duplikat

In [9]:
df_data.isnull().sum()

Unnamed: 0,0
track_name,1
artist_name,0
genre,0
acousticness,0
danceability,0
energy,0
instrumentalness,0
liveness,0
loudness,0
speechiness,0


Melihat missing value data, dan ditemukan 1 missing value dalam kolom track_name

In [10]:
df_data.dropna(subset=['track_name'], inplace=True)

Manghapus missing value pada kolom track_name

In [11]:
df_data.isnull().sum()

Unnamed: 0,0
track_name,0
artist_name,0
genre,0
acousticness,0
danceability,0
energy,0
instrumentalness,0
liveness,0
loudness,0
speechiness,0


Menampilkan hasil cek data yang sudah dibersihkan dan tidak ditemukan lagi ada nya missing value

In [12]:
df_sample = df_data.sample(n=1000, random_state=42).reset_index(drop=True)

Mengambil 1000 sample agar dapat mempercepat proses cosine similarity

In [13]:
audio_features = ['acousticness', 'danceability', 'energy',
                  'instrumentalness', 'liveness', 'loudness',
                  'speechiness', 'tempo', 'valence']

scaler = StandardScaler()
scaled_sample = scaler.fit_transform(df_sample[audio_features])
df_scaled_sample = pd.DataFrame(scaled_sample, columns=audio_features)

Melakukan Standarisasi pada fitur audio agar setiap fitur memiliki skala yang sebanding

## Cosine

In [14]:
similarity_matrix = cosine_similarity(df_scaled_sample)

Menghitung kemiripan dengan matrix

In [15]:
# Fungsi merekomendasikan lagu berdasarkan judul lagu input
def recommend_song(song_title, df, similarity_matrix,  top_n=5):

    # Mencari lagu yang mirip
    match = df[df['track_name'].str.lower() == song_title.lower()]

    # Jika tidak ditemukan :
    if match.empty:
        print(f"No song found with title '{song_title}'.")
        return

    # Mengambil index lagu
    song_index = match.index[0]

    # Mengambil & Mengurutkan skor kemiripan
    similarity_scores = list(enumerate(similarity_matrix[song_index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    similarity_scores = similarity_scores[1:top_n+1]

    # Mengambil detail lagu yang mirip
    recommended = df.iloc[[i[0] for i in similarity_scores]][['track_name', 'artist_name', 'genre']]

    return recommended.reset_index(drop=True)

Fungsi rekomendasi lagu berdasarkan data yang di input oleh pengguna, dan hasil output akan menampilan 5 rekomendasi yang sesuai dengan kemiripan audio

In [16]:
df_sample['track_name'].sample(10, random_state=0)

Unnamed: 0,track_name
993,Flying
859,Crave It Still
298,Hear Me Now (feat. DIAMANTE)[Acoustic]
553,Hand It Over
672,Dicen (feat. Kendo)
971,Define Dancing
27,I Knew You Were Trouble
231,Tuba Sonata (Concerto): I. Allegro moderato
306,Ach Basiu
706,Express Yourself


Contoh sample data yang bisa digunakan untuk uji coba jalannya program

In [17]:
recommend_song("Hand It Over", df_sample, similarity_matrix)

Unnamed: 0,track_name,artist_name,genre
0,Land Of The Free,The Killers,Rock
1,Safe and Sound,Christian Leave,Indie
2,somethingfartoogoodtofeel,The Japanese House,R&B
3,Have A Little Faith In Me,John Hiatt,Folk
4,Asshole,Denis Leary,Comedy


Memanggil fungsi dan Menampilkan 5 data rekomendasi