In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import euclidean_distances
import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt


In [None]:
# Reload the dataset
spotify_df = pd.read_csv("high_popularity_spotify_data.csv")

# Select numerical features for similarity analysis
features = [
    "energy", "danceability", "valence", "acousticness", 
    "speechiness", "instrumentalness", "tempo", "loudness"
]

# Drop rows with missing values in selected features
spotify_df_clean = spotify_df.dropna(subset=features)


In [None]:
# Normalize features using Min-Max scaling
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(spotify_df_clean[features])

# Add scaled features back into DataFrame for reference
scaled_df = pd.DataFrame(scaled_features, columns=[f"{col}_scaled" for col in features])
spotify_df_scaled = pd.concat([spotify_df_clean.reset_index(drop=True), scaled_df], axis=1)

# Set up query indices (choosing 3 distinct songs)
query_indices = [1, 3, 5]
queries = spotify_df_scaled.iloc[query_indices]

In [7]:
# Compute pairwise distances
dist_matrix = euclidean_distances(scaled_features)

# Find top 10 most similar tracks for each query (excluding itself)
similar_tracks = {}
for idx in query_indices:
    distances = dist_matrix[idx]
    similar_indices = np.argsort(distances)[1:11]  # skip index 0 (itself)
    similar_tracks[idx] = spotify_df_scaled.iloc[similar_indices][["track_artist", "energy", "danceability", "valence", "loudness"]]

# Package the results for display
query_results = {
    spotify_df_scaled.iloc[idx]["track_artist"]: similar_tracks[idx].reset_index(drop=True)
    for idx in query_indices
}
for query_idx in query_indices:
    query_artist = spotify_df_scaled.iloc[query_idx]["track_artist"]
    print(f"\nTop 10 Similar Songs to: {query_artist}")
    print(similar_tracks[query_idx][["track_artist", "energy", "danceability", "valence", "loudness"]].to_string(index=False))




Top 10 Similar Songs to: Billie Eilish
               track_artist  energy  danceability  valence  loudness
              Billie Eilish   0.507         0.747    0.438   -10.171
              Billie Eilish   0.507         0.747    0.438   -10.171
    FloyyMenor, Lewis Somes   0.496         0.696    0.476    -7.532
                      Drake   0.463         0.809    0.364   -11.377
              Bryson Tiller   0.512         0.803    0.435    -6.871
Tiakola, Genezio, Prototype   0.569         0.772    0.525    -7.006
                    YEONJUN   0.557         0.815    0.482    -7.166
           P!nk, Nate Ruess   0.547         0.778    0.442    -7.273
                     Indila   0.449         0.669    0.327    -7.658
                  Lil Tecca   0.588         0.712    0.471    -5.332

Top 10 Similar Songs to: Sabrina Carpenter
                         track_artist  energy  danceability  valence  loudness
                    IVE, David Guetta   0.886         0.655    0.778    -5.227