In [1]:
import pandas as pd
from difflib import get_close_matches

In [2]:
df = pd.read_csv("../dataset/songsCleaned.csv")

#overview
shape = df.shape
columns = df.columns.tolist()
sample = df.head(3)

shape, columns, sample

((26230, 23),
 ['track_id',
  'track_name',
  'track_artist',
  'track_popularity',
  'track_album_id',
  'track_album_name',
  'track_album_release_date',
  'playlist_name',
  'playlist_id',
  'playlist_genre',
  'playlist_subgenre',
  'danceability',
  'energy',
  'key',
  'loudness',
  'mode',
  'speechiness',
  'acousticness',
  'instrumentalness',
  'liveness',
  'valence',
  'tempo',
  'duration_ms'],
                  track_id                                         track_name  \
 0  6f807x0ima9a1j3VPbc7VN  I Don't Care (with Justin Bieber) - Loud Luxur...   
 1  0r7CVbZTWZgbTCYdfa2P31                    Memories - Dillon Francis Remix   
 2  1z1Hg7Vb0AhHDiEmnDE79l                    All the Time - Don Diablo Remix   
 
    track_artist  track_popularity          track_album_id  \
 0    Ed Sheeran                66  2oCs0DGTsRO98Gh5ZSl2Cx   
 1      Maroon 5                67  63rPSO264uRjW1X5E6cWv6   
 2  Zara Larsson                70  1HoSmj2eLcsrR0vE9gThr4   
 
             

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

#numeric features (yng relevan)
features = [
    "danceability", "energy", "speechiness", "acousticness",
    "instrumentalness", "liveness", "valence", "tempo", "loudness"
]

X = df[features].copy()

#normalisasi biar skala kyk tempo & danceability ga timpang
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


df_features = pd.DataFrame(X_scaled, index=df.index, columns=features)


In [None]:
def recommender(n=5):
    while True:
        song_name = input("\nInput song title (or type 'exit' to stop): ").strip()
        if song_name.lower() == "exit":
            print("stoped")
            break

        #semua judul bentuk string
        all_titles = df["track_name"].dropna().astype(str).unique().tolist()
        
        #exact match (case ins)
        matches = df[df["track_name"].str.lower() == song_name.lower()]
        
        if matches.empty:
            #suggestions kalau ga ketemu
            close = get_close_matches(song_name, all_titles, n=3, cutoff=0.5)
            if not close:
                print(f"couldnt find this song: '{song_name}'")
            else:
                print(f"'{song_name}' not found. Do you mean: {close}?")
            continue
        
        #lagu[0] yang cocok
        idx = matches.index[0]
        chosen = df.iloc[idx]
        print(f"\nchosen song: '{chosen.track_name}' - {chosen.track_artist}")
        
        #similarity score dari features
        song_vector = df_features.iloc[idx].values.reshape(1, -1)
        similarities = cosine_similarity(song_vector, df_features)[0]


        similar_idx = np.argsort(similarities)[::-1][1:n+1]
        
        results = df.iloc[similar_idx][["track_name", "track_artist"]].copy()
        results["similarity"] = similarities[similar_idx]
        
        print("\nsimilary song recommendations:")
        for i, row in results.iterrows():
            print(f"- {row.track_name} — {row.track_artist} (sim index={row.similarity:.3f})")


In [5]:
recommender()


chosen song: 'Young And Beautiful' - Lana Del Rey

similary song recommendations:
- Apricot Princess — Rex Orange County (sim index=0.994)
- Come On — Debra Killings (sim index=0.982)
- Feeling Good — Muse (sim index=0.982)
- Oceans — Seafret (sim index=0.981)
- Nowhere to Run — Brennan Savage (sim index=0.977)
'how do i make you love me' not found. Do you mean: ['Songs To Make You Move', 'take you home', 'I Wonder if I Take You Home']?

chosen song: 'Lose My Mind' - Pacific Air

similary song recommendations:
- Stuck — The Aces (sim index=0.941)
- Part Of Me (Club Remix) (feat. Kareem) — LittleKings (sim index=0.936)
- High — Sir Sly (sim index=0.915)
- Paradise — The Sunnefield (sim index=0.910)
- Alright - 7" House Mix With Rap — Janet Jackson (sim index=0.910)

chosen song: 'lovely (with Khalid)' - Billie Eilish

similary song recommendations:
- Summer Lover — Cub Sport (sim index=0.995)
- Video Games - Remastered — Lana Del Rey (sim index=0.992)
- Hold Me Down — Daniel Caesar (si