In [1]:
import pandas as pd
import numpy as np

from utils.pca_scrath import pca

In [2]:
df = pd.read_csv('Data/Processed/spotify_clean.csv')

In [3]:
features = ['danceability', 'energy', 'valence', 'tempo', 'loudness', 'popularity']
df = df.dropna(subset=features).reset_index(drop=True)

In [7]:
X = df[features].values.astype(float)

In [5]:
def standardize(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std, mean, std

In [None]:
def pca(X, n_components=None):
    X_std, mean, std = standardize(X)
    cov_matrix = np.cov(X_std, rowvar=False)

    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

    sorted_idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_idx]
    eigenvectors = eigenvectors[:, sorted_idx]

    if n_components is not None:
        eigenvectors = eigenvectors[:, :n_components]

    X_pca = np.dot(X_std, eigenvectors)
    return X_pca, eigenvalues, eigenvectors

In [8]:
X_pca, eigenvalues, eigenvectors = pca(X, n_components=4)

In [10]:
for i in range(4):
    df[f'PC{i+1}'] = [row[i] for row in X_pca]

In [11]:
X_pca_norm, _, _ = standardize(df[['PC1', 'PC2', 'PC3', 'PC4']].values)

In [12]:
def recommend_similar(song_index, top_k=5):
    target = X_pca_norm[song_index]
    norms = np.linalg.norm(X_pca_norm, axis=1)
    similarities = np.dot(X_pca_norm, target) / (norms * np.linalg.norm(target))

    indices = np.argsort(-similarities)
    indices = [i for i in indices if i != song_index]

    seen = set()
    unique_indices = []
    for i in indices:
        key = (df.loc[i, 'track_name'], df.loc[i, 'artist'])
        if key not in seen:
            seen.add(key)
            unique_indices.append(i)
        if len(unique_indices) >= top_k:
            break

    cols = ['track_name', 'artist', 'genre']
    return df.iloc[unique_indices][cols]

In [13]:
idx = 1000
print("Gợi ý cho bài hát:")
print(f"→ {df.loc[idx, 'track_name']} - {df.loc[idx, 'artist']}\n")

print("Các bài hát tương tự:")
print(recommend_similar(idx, top_k=5))

Gợi ý cho bài hát:
→ Can't Stay Mad - Danielle Bradbery

Các bài hát tương tự:
                     track_name       artist  genre
4834    Because He Lives (Amen)   Matt Maher      1
103381             Pure Morning      Placebo      5
101959                 It's On!         Korn      5
78595              Stomp Me Out    Bryce Fox      5
24131              Another Life  Yellow Claw     10
