In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import NearestNeighbors

import spotipy
import time

import warnings
warnings.filterwarnings('ignore')

In [51]:
def find_song(artist_name: str, track_name: str, df: pd.DataFrame) -> pd.DataFrame:
    # Search for the song in the dataset
    song = df[(df['artist_name'] == artist_name) & (df['track_name'] == track_name)]
    if not song.empty:
        return song
    else:
        return "Song not found in the dataset."    

def find_similar_songs(input_song: pd.DataFrame, df, knn, scaler):
    print(input_song)
    print(type(input_song))
    input_song_scaled = scaler.transform(input_song)

    distances, indices = knn.kneighbors(input_song_scaled)
    nearest_neighbors = df.iloc[indices[0]].copy()

    return [indices, nearest_neighbors]

In [52]:
def print_knn_recommendations(recommendations: list):
    if recommendations == None:
        print("No recommendations found :(")
        return
    print("Here's a few songs I recommend you listen to!")
    for track in recommendations[1:]:
        print(f"'{track['track_name']}', by {track['artist_name']}")

def get_track_id(sp: spotipy.Spotify, track_name: str, artist_name: str):
    query = f"track:{track_name} artist:{artist_name}"
    result = sp.search(q=query, type='track', limit=1)
    time.sleep(1) # Avoid ratelimit
    tracks = result['tracks']['items']
    if tracks:
        track_id = tracks[0]['id']
        return track_id
    else:
        return None

def knn_recommend(sp: spotipy.Spotify, track_dataset: pd.DataFrame, X: pd.DataFrame, knn: NearestNeighbors, scaler: StandardScaler, features: list, track_name: str, artist_name: str) -> list:
    # Fetch Song Information
    found_song = find_song(
        artist_name.strip().lower(),
        track_name.strip().lower(),
        track_dataset
    )
    # Get Audio Features
    if isinstance(found_song, str): # Get information from Spotify
        found_song = get_track_id(sp, track_name, artist_name)[features]
        if found_song == None:
            return None
        found_song = sp.audio_features(found_song)
        found_song = pd.DataFrame(found_song)
    else: # Found in Million Track Dataset
        found_song = found_song[features]

    # Find similar songs based on KNN
    indices, similar_songs = find_similar_songs(found_song, X, knn, scaler)
    nearest_neighbors = track_dataset.iloc[indices[0]].copy()
    out = []
    for index, row in nearest_neighbors.iterrows():
        out.append(row)
    return out

In [53]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
from dotenv import load_dotenv

# Fetch environment variables
load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')

# Set up Spotify API credentials
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [54]:
# Main
track_dataset = pd.read_csv('../spotify_data.csv')

track_dataset['artist_name'] = track_dataset['artist_name'].str.strip().str.lower()
track_dataset['track_name'] = track_dataset['track_name'].str.strip().str.lower()

track_dataset.drop_duplicates()
track_dataset.dropna()

# Select features for k-NN

features = ['danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo']
X = track_dataset[features]

# Keep the additional information
# track_info = df[['track_id', 'track_name', 'artist_name']]

# peek = track_dataset.iloc[0]
# print(peek)

# One-Hot Encoding Approach (TOO MEMORY COSTLY)
# tracks = pd.get_dummies(tracks, columns=['artist_name', 'track_name'])

# One-Hot Encoding Genres
# track_dataset = pd.get_dummies(tracks, columns=['genre'])

# Label Encoding
# le_artist = LabelEncoder()
# le_track = LabelEncoder()

# # Arist and Track Name Encoding 
# track_dataset['artist_name'] = le_artist.fit_transform(track_dataset['artist_name'])
# track_dataset['track_name'] = le_track.fit_transform(track_dataset['track_name'])


# Drop Indexing and Track ID columns
# track_dataset = track_dataset.drop(columns=['Unnamed: 0','track_id','genre','popularity','track_name','artist_name'])

scaler = StandardScaler()
scaled_features = scaler.fit_transform(X)
scaled_tracks = pd.DataFrame(scaled_features, columns=X.columns)

k = 6
knn = NearestNeighbors(n_neighbors=k, algorithm='auto')
knn.fit(X)

In [55]:
# in_track_name = input("Enter a song name: \n")
# in_artist_name = input("Who is it by?\n")
in_track_name = "butter"
in_artist_name = "bts"
# recommendations = knn_recommend(sp, track_dataset, knn, le_artist, le_track, in_track_name, in_artist_name)
recommendations = knn_recommend(sp, track_dataset, X, knn, scaler, features, in_track_name, in_artist_name)
print_knn_recommendations(recommendations)

        danceability  energy  key  loudness  mode  speechiness  acousticness  \
510764         0.759   0.459    8    -5.187     1       0.0948       0.00323   

        instrumentalness  liveness  valence    tempo  
510764               0.0    0.0906    0.695  109.997  
<class 'pandas.core.frame.DataFrame'>
Here's a few songs I recommend you listen to!
'doubled over', by agoraphobic nosebleed
'baby mill pt. 1 (born and sold into child slavery)', by agoraphobic nosebleed
'accepted foulness amongst the humane', by last days of humanity
'apocalypse as mescaline experience', by agoraphobic nosebleed
'apatia', by plague rages


In [56]:
print(len(recommendations))

6


In [57]:
in_track_name = "bubble tea"
in_artist_name = "dark cat"
recommendations = knn_recommend(sp, track_dataset, X, knn, scaler, features, in_track_name, in_artist_name)
print_knn_recommendations(recommendations)

TypeError: string indices must be integers, not 'list'