In [55]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity

In [56]:
df = pd.read_csv("SpotifyFeatures.csv")   # update name if needed
df = df.dropna(subset=['track_name', 'artist_name', 'genre'])

In [57]:
def find_features(df):
    features = ['acousticness', 'danceability', 'energy', 'instrumentalness',
                'liveness', 'loudness', 'speechiness', 'tempo', 'valence']
    X = df[features]
    y = LabelEncoder().fit_transform(df['genre'])

    rf = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42)
    rf.fit(X, y)

    # Get importance scores
    importance = pd.DataFrame({
        'feature': features,
        'importance': rf.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nFeature Importance:")
    print(importance)
    
    return importance

In [58]:
importance = find_features(df)


Feature Importance:
            feature  importance
6       speechiness    0.241814
3  instrumentalness    0.143141
0      acousticness    0.135836
1      danceability    0.119356
2            energy    0.104978
5          loudness    0.098660
8           valence    0.076600
4          liveness    0.055643
7             tempo    0.023972


In [59]:
feat_cols = [
    'acousticness', 'danceability', 'energy',
    'instrumentalness', 'tempo', 'valence','speechiness','loudness'
]

In [60]:
scaler = StandardScaler()
X_audio = scaler.fit_transform(df[feat_cols])

In [86]:
RF_feature = np.array([1.36 , 1.19 , 1.05 , 1.43, 0.24, 0.77 , 2.42 , 0.99 ])
Manual_feature = np.array([0.7, 1.3, 1.5, 0.8, 1.8, 1.2,1.0 ,1.0])
feature_weights = RF_feature * 0.3 + Manual_feature * 0.7
X_audio_weighted = X_audio * feature_weights

In [87]:
tfidf = TfidfVectorizer()
genre_tfidf = tfidf.fit_transform(df["genre"])

# Reduce genre dimensions
svd = TruncatedSVD(n_components=5)
genre_emb = svd.fit_transform(genre_tfidf)

# Apply genre weight
genre_emb = genre_emb * 0.3

In [88]:
X_combined = np.hstack((X_audio_weighted, genre_emb))

In [89]:
#pca = PCA(n_components=10)
#X_pca = pca.fit_transform(X_combined)

In [90]:
X_final = X_combined

In [109]:
def recommend(song_name, df, features_matrix, k=10):
  matches = df[df['track_name'].str.lower() == song_name.lower()]
  if matches.empty:
    raise ValueError(f"Song '{song_name}' not found in dataset.")

  query_indices = matches.index.tolist()
  query_index = query_indices[0]
  query_vector = features_matrix[query_index].reshape(1,-1)
  similarities = cosine_similarity(query_vector, features_matrix).flatten()

  # Set its own similarity to -1 so it's excluded
  for idx in query_indices:
      similarities[idx] = -1

  # Rank by similaritiy
  sort_indices = np.argsort(similarities)[::-1]

  results = []
  seen = set()

  for idx in sort_indices:
      if len(results) >= k:
          break
          
      name = df["track_name"].iloc[idx]
      if name not in seen:
        results.append({
            "track_name": name,
            "artist_name": df["artist_name"].iloc[idx],
            "genre": df["genre"].iloc[idx],
            "similarity": float(similarities[idx])
          })
      seen.add(name)

  return results

In [110]:
song = "SLOW DANCING IN THE DARK - Loud Luxury Remix"

recommendations = recommend(song, df, X_final, k=10)

for r in recommendations:
  print(r)

{'track_name': 'NBD (Phlegmatic Dogs Remix)', 'artist_name': 'Boombox Cartel', 'genre': 'Electronic', 'similarity': 0.9922138306649337}
{'track_name': 'Go', 'artist_name': 'The Chemical Brothers', 'genre': 'Alternative', 'similarity': 0.9894916882595175}
{'track_name': 'I Need A Painkiller', 'artist_name': 'Armand Van Helden', 'genre': 'Electronic', 'similarity': 0.9880155233308786}
{'track_name': 'Days Are Gone', 'artist_name': 'HAIM', 'genre': 'Folk', 'similarity': 0.9879846455946131}
{'track_name': 'Noche Y De Dia', 'artist_name': 'Enrique Iglesias', 'genre': 'Dance', 'similarity': 0.9872440091412832}
{'track_name': 'Right In Front Of Me feat. Kaleena Zanders', 'artist_name': 'Win and Woo', 'genre': 'Soul', 'similarity': 0.987157136295088}
{'track_name': 'Fire Inside feat. RIOT - Funky VIP', 'artist_name': 'Apashe', 'genre': 'Electronic', 'similarity': 0.9868367997999684}
{'track_name': 'Kanzenni manzokushita man (Perfect Satisfied Man)', 'artist_name': 'Nanahira', 'genre': 'Anime',