In [2]:
import pandas as pd
import tkinter as tk
from tkinter import messagebox, ttk
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

# Spotify API credentials
client_id = ''
client_secret = ''

# Authenticate with Spotify
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

# Load the dataset with error handling for encoding issues
try:
    df = pd.read_csv('dataset.csv', encoding='ISO-8859-1')
    print("Successfully read the file with ISO-8859-1 encoding")
except UnicodeDecodeError:
    print("Failed with ISO-8859-1 encoding, trying with Windows-1252")
    df = pd.read_csv('dataset.csv', encoding='Windows-1252')

# Print unique genres to debug
print("Unique genres in the dataset:", df['track_genre'].unique())

# Ensure the columns for features are correctly identified
features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
popularity_feature = 'popularity'

# Select features and ensure they exist in the dataset
if all(col in df.columns for col in features + [popularity_feature]):
    X = df[features]

    # Normalize the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Fit the KNN model
    knn = NearestNeighbors(n_neighbors=5, algorithm='auto').fit(X_scaled)

    print("KNN model fitted successfully with the following features:")
    print(features)
else:
    print("Error: Not all specified features exist in the dataset.")

# Function to get artist genres from Spotify
def get_artist_genres(artist_name):
    try:
        results = sp.search(q=f'artist:{artist_name}', type='artist', limit=1)
        if results['artists']['items']:
            artist = results['artists']['items'][0]
            genres = artist['genres']
            return genres
        else:
            print(f"Artist not found: {artist_name}")
            return []
    except Exception as e:
        print(f"Error fetching artist genres: {e}")
        return []

# Function to find nearest neighbors within a genre and popularity constraints
def find_nearest_neighbors_within_genre(track_features, genre, target_popularity, role, n_neighbors=5):
    genre_df = df[df['track_genre'] == genre]
    
    if genre_df.empty:
        print(f"No tracks found for genre: {genre}")
        return pd.DataFrame()  # Return an empty DataFrame if no tracks are found
    
    if role == "Headliner":
        genre_df = genre_df[genre_df[popularity_feature] > target_popularity]
    elif role == "Supporting Act":
        genre_df = genre_df[(genre_df[popularity_feature] < target_popularity) & 
                            (genre_df[popularity_feature] > genre_df[popularity_feature].min())]
    elif role == "Opening Act":
        genre_df = genre_df[genre_df[popularity_feature] < target_popularity]
    else:
        print(f"Invalid role: {role}")
        return pd.DataFrame()

    if genre_df.empty:
        print(f"No tracks found for role {role} within popularity constraints.")
        return pd.DataFrame()
    
    print(f"Filtered genre_df for role {role}:", genre_df)
    
    genre_X = genre_df[features]
    genre_X_scaled = scaler.transform(genre_X)
    
    track_features_df = pd.DataFrame([track_features], columns=features)
    track_features_scaled = scaler.transform(track_features_df)
    
    try:
        knn.n_neighbors = n_neighbors
        distances, indices = knn.kneighbors(track_features_scaled)
        valid_indices = [i for i in indices[0] if i < len(genre_df)]
        if not valid_indices:
            print(f"No valid neighbors found for genre: {genre}")
            return pd.DataFrame()
        neighbors = genre_df.iloc[valid_indices]
        print("Found neighbors:", neighbors)
        return neighbors
    except ValueError as e:
        print(f"Error finding neighbors: {e}")
        return pd.DataFrame()

# Function to handle the button click
def on_submit():
    artist_name = artist_entry.get()
    role = role_var.get()
    genres = get_artist_genres(artist_name)
    if genres:
        genre_selection_window(genres, artist_name, role)
    else:
        messagebox.showerror("Error", "Artist not found")

# Function to handle genre selection and display nearest neighbors
def genre_selection_window(genres, artist_name, role):
    def on_genre_select():
        selected_genre = genre_var.get()
        track_features = get_track_features(artist_name, selected_genre)
        if track_features:
            target_popularity = df[df['artists'].str.contains(artist_name, case=False, na=False)][popularity_feature].values[0]
            print(f"Target popularity for {artist_name} is {target_popularity}")

            # Find neighbors for supporting act
            if role == "Headliner":
                supporting_neighbors = find_nearest_neighbors_within_genre(track_features, selected_genre, target_popularity, "Supporting Act")
                if not supporting_neighbors.empty:
                    supporting_popularities = supporting_neighbors[popularity_feature].values
                    print(f"Supporting act popularities: {supporting_popularities}")

                    # Find neighbors for opening act based on supporting act popularity
                    for popularity in supporting_popularities:
                        opening_neighbors = find_nearest_neighbors_within_genre(track_features, selected_genre, popularity, "Opening Act")
                        if not opening_neighbors.empty:
                            display_neighbors(opening_neighbors, "Opening Act")

                    display_neighbors(supporting_neighbors, "Supporting Act")
                else:
                    messagebox.showerror("Error", f"No supporting act neighbors found for genre: {selected_genre}")
            elif role == "Supporting Act":
                headliner_neighbors = find_nearest_neighbors_within_genre(track_features, selected_genre, target_popularity, "Headliner")
                opening_neighbors = find_nearest_neighbors_within_genre(track_features, selected_genre, target_popularity, "Opening Act")
                if not headliner_neighbors.empty:
                    display_neighbors(headliner_neighbors, "Headliner")
                if not opening_neighbors.empty:
                    display_neighbors(opening_neighbors, "Opening Act")
            elif role == "Opening Act":
                supporting_neighbors = find_nearest_neighbors_within_genre(track_features, selected_genre, target_popularity, "Supporting Act")
                if not supporting_neighbors.empty:
                    display_neighbors(supporting_neighbors, "Supporting Act")
            else:
                messagebox.showerror("Error", "Invalid role selected")
        else:
            messagebox.showerror("Error", f"Track features could not be retrieved for artist: {artist_name} in genre: {selected_genre}")
        genre_win.destroy()

    genre_win = tk.Toplevel(root)
    genre_win.title("Select Genre")

    tk.Label(genre_win, text="Select Genre").grid(row=0, column=0, padx=10, pady=10)
    genre_var = tk.StringVar(genre_win)
    genre_menu = ttk.Combobox(genre_win, textvariable=genre_var, values=genres)
    genre_menu.grid(row=1, column=0, padx=10, pady=10)
    
    select_button = tk.Button(genre_win, text="Select", command=on_genre_select)
    select_button.grid(row=2, column=0, pady=10)

# Function to get track features using artist and genre
def get_track_features(artist_name, genre):
    try:
        results = sp.search(q=f'artist:{artist_name} genre:{genre}', type='track', limit=1)
        if results['tracks']['items']:
            track = results['tracks']['items'][0]
            track_id = track['id']
            track_features = sp.audio_features(track_id)[0]
            # Keep only relevant features
            track_features = {key: track_features[key] for key in features}
            return track_features
        else:
            print(f"No tracks found for artist: {artist_name} in genre: {genre}")
            return None
    except Exception as e:
        print(f"Error fetching track features: {e}")
        return None

# Function to display nearest neighbors
def display_neighbors(neighbors, role):
    result_text.insert(tk.END, f"{role} Recommendations:\n")
    for index, row in neighbors.iterrows():
        result_text.insert(tk.END, f"Track: {row['track_name']}\n")
        result_text.insert(tk.END, f"Artist: {row['artists']}\n")
        result_text.insert(tk.END, f"Album: {row['album_name']}\n")
        result_text.insert(tk.END, "-"*50 + "\n")

# Create the GUI
root = tk.Tk()
root.title("Spotify Nearest Neighbors")

# Create and place the components
tk.Label(root, text="Artist Name").grid(row=0, column=0, padx=10, pady=10)
tk.Label(root, text="Role").grid(row=1, column=0, padx=10, pady=10)
artist_entry = tk.Entry(root)
artist_entry.grid(row=0, column=1, padx=10, pady=10)

role_var = tk.StringVar(root)
role_menu = ttk.Combobox(root, textvariable=role_var, values=["Opener", "Supporting Act", "Headliner"])
role_menu.grid(row=1, column=1, padx=10, pady=10)

submit_button = tk.Button(root, text="Find Nearest Neighbors", command=on_submit)
submit_button.grid(row=2, columnspan=2, pady=10)

result_text = tk.Text(root, height=20, width=80, wrap='word', padx=10, pady=10)
result_text.grid(row=3, columnspan=2, padx=10, pady=10)

# Run the GUI
root.mainloop()


Successfully read the file with ISO-8859-1 encoding
Unique genres in the dataset: ['acoustic' 'afrobeat' 'alt-rock' 'alternative' 'ambient' 'anime'
 'black-metal' 'bluegrass' 'blues' 'breakbeat' 'british' 'chicago-house'
 'chill' 'classical' 'club' 'comedy' 'country' 'dance' 'dancehall'
 'death-metal' 'deep-house' 'detroit-techno' 'disco' 'drum-and-bass' 'dub'
 'dubstep' 'edm' 'electro' 'electronic' 'emo' 'folk' 'forro' 'funk'
 'garage' 'goth' 'grindcore' 'groove' 'grunge' 'guitar' 'happy'
 'hard-rock' 'hardcore' 'hardstyle' 'heavy-metal' 'hip-hop' 'honky-tonk'
 'house' 'idm' 'indie' 'industrial' 'j-dance' 'j-pop' 'j-rock' 'jazz'
 'malay' 'mandopop' 'metal' 'metalcore' 'minimal-techno' 'mpb' 'new-age'
 'party' 'piano' 'pop-film' 'pop' 'power-pop' 'progressive-house'
 'psych-rock' 'punk-rock' 'punk' 'r-n-b' 'reggae' 'reggaeton'
 'rock-n-roll' 'rock' 'rockabilly' 'romance' 'sad' 'show-tunes'
 'singer-songwriter' 'ska' 'sleep' 'soul' 'study' 'synth-pop' 'tango'
 'techno' 'trance' 'trip-ho