In [4]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors

# Load your dataset
data = pd.read_csv('Spotify_Tracks.csv')

# Remove rows with null values
data = data.dropna()

# Assuming your dataset has columns: 'name', 'artists', 'danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'year'

# Define the features you want to use for recommendation
features = ['danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

# Function to recommend songs
def recommend_songs(song, primary_feature, primary_weightage, num_recommendations, total_recommended):
    # Convert input song name to lowercase
    song = song.lower()

    # Selecting the relevant features for recommendation
    X = data[features]

    # Apply primary weightage
    X[primary_feature] *= primary_weightage
    remaining_weightage = (1 - primary_weightage) / (len(features) - 1)
    for feature in features:
        if feature != primary_feature:
            X[feature] *= remaining_weightage

    # Fit KNN model
    model = NearestNeighbors(n_neighbors=len(data), algorithm='auto')  # Neighbors equal to the total number of songs
    model.fit(X)

    # Transform the input song features
    song_features = data[data['name'].str.lower() == song][features]

    # Apply primary weightage to input song features
    song_features[primary_feature] *= primary_weightage
    for feature in features:
        if feature != primary_feature:
            song_features[feature] *= remaining_weightage

    # Find all nearest neighbors
    distances, indices = model.kneighbors(song_features)

    # Retrieve recommended songs, excluding the input song and songs that have been recommended before
    recommended_indices = indices[0][distances[0] > 0]  # Exclude the input song
    recommended_indices = recommended_indices[~np.isin(recommended_indices, total_recommended)]  # Convert to Pandas Series to use isin
    recommended_songs = data.iloc[recommended_indices][['name', 'artists', 'release_date']]

    if len(recommended_songs) == 0:
        print("No more results")
        return None, total_recommended

    # Calculate similarity scores based on distances for the recommended songs only
    similarity_scores = (1 - abs(distances[0][distances[0] > 0])) * 100

    # Add rounded similarity scores to recommended songs dataframe
    recommended_songs['Similarity Score %'] = similarity_scores.round(2)

    # Keep track of total recommended songs
    total_recommended += recommended_indices.tolist()

    return recommended_songs[:num_recommendations], total_recommended

# Initial user input
song = input("Enter the name of the song: ")
primary_feature = input("Enter the primary feature (e.g., 'energy'): ")
primary_weightage = float(input("Enter the primary weightage (between 0 and 1): "))
num_recommendations = int(input("Enter the number of recommendations: "))

# Initialize total recommended songs list
total_recommended = []

# Generate recommendations until reaching a total of 40
while len(total_recommended) < 40:
    recommended_songs, total_recommended = recommend_songs(song, primary_feature, primary_weightage, num_recommendations, total_recommended)
    if recommended_songs is not None:
        print(recommended_songs)
        print("Total recommended:", len(total_recommended))
        if len(total_recommended) >= 40:
            print("No more results")
            break
        next_option = input("Would you like to see more recommendations? (yes/no): ")
        if next_option.lower() != 'yes':
            break


Enter the name of the song: candle
Enter the primary feature (e.g., 'energy'): energy
Enter the primary weightage (between 0 and 1): 0.6
Enter the number of recommendations: 8


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[primary_feature] *= primary_weightage
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[feature] *= remaining_weightage


                                        name  \
283937                              Tomorrow   
108911                              Tomorrow   
86996   Ocarina (Brennan Heart Remix) - Edit   
379303                           Oh Darkness   
551433                        DREAM ON 抱きしめて   
533319                    Jako Tele Na Vrata   
173995                          Come Clarity   
57690        Ride The Lightning - Remastered   

                                                  artists release_date  \
283937                                          ['James']   1998-01-01   
108911                                          ['James']         1997   
86996   ['Dimitri Vegas & Like Mike', 'Wolfpack', 'Bre...   2014-12-12   
379303                                ['Admiral Freebee']   2005-01-01   
551433                                       ['LINDBERG']   1993-06-05   
533319                                        ['Olympic']         1986   
173995                                      ['In 