In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv('Music_Info.csv')  # Replace with your actual dataset file path

# Step 1: Inspect the data
print("Initial Dataset Info:")
print(df.info())
print("\nFirst Few Rows of the Dataset:")
print(df.head())

# Step 2: Handle Missing Values
missing_values = df.isnull().sum()
print("\nMissing Values in Each Column:")
print(missing_values)

# Fill missing values with mean for numerical columns
df.fillna(df.mean(), inplace=True)

# Step 3: Encoding Categorical Features
# One-hot encoding for categorical columns like 'genre' if necessary
if 'genre' in df.columns:
    df = pd.get_dummies(df, columns=['genre'], drop_first=True)

# Step 4: Specify the features to use for recommendations
features = ['danceability', 'duration_ms', 'energy', 'key', 'loudness', 
            'mode', 'speechiness', 'acousticness', 'instrumentalness', 
            'liveness', 'valence', 'tempo', 'time_signature']

# Step 5: Normalize the data for features used
df[features] = df[features].astype(float)
scaler = StandardScaler()
df[features] = scaler.fit_transform(df[features])

# Build a Nearest Neighbors model
model_knn = NearestNeighbors(metric='euclidean', algorithm='brute', n_neighbors=6)
model_knn.fit(df[features])

# Function to search for a song by name and artist
def search_song(df, song_name=None, artist_name=None):
    if song_name and artist_name:
        results = df[(df['name'].str.contains(song_name, case=False, na=False)) & 
                      (df['artist'].str.contains(artist_name, case=False, na=False))]
    elif song_name:
        results = df[df['name'].str.contains(song_name, case=False, na=False)]
    elif artist_name:
        results = df[df['artist'].str.contains(artist_name, case=False, na=False)]
    else:
        print("Please provide either a song name or an artist name to search.")
        return None
    
    if results.empty:
        print("No songs found for your search.")
    else:
        print(f"Found {len(results)} songs. Showing top 5 results:")
        print(results[['track_id', 'name', 'artist']].head())
        
    return results

# Function to recommend similar songs based on a song's track_id
def recommend_songs_by_id(df, model_knn, track_id, n_recommendations=5):
    track_index = df.index[df['track_id'] == track_id][0]
    distances, indices = model_knn.kneighbors([df.iloc[track_index][features]], n_neighbors=n_recommendations + 1)
    
    recommendations = df.iloc[indices[0][1:]]
    return recommendations, distances[0]

# Function to visualize recommendations
def visualize_recommendations(recommendations, distances, selected_song):
    plt.figure(figsize=(10, 6))
    plt.barh(recommendations['name'], 1 - distances[1:])  # 1 - distance to represent similarity
    plt.xlabel('Similarity Score')
    plt.ylabel('Recommended Songs')
    plt.title(f"Top Recommended Songs for '{selected_song['name']}' by {selected_song['artist']}")
    plt.show()


if __name__ == "__main__":
    # Take user input for the song name and artist name
    song_name = input("Enter the song name you want recommendations for: ")
    artist_name = input("Enter the artist name: ")
    
    # Search for the specified song
    search_results = search_song(df, song_name=song_name, artist_name=artist_name)
    
    # Get recommendations for the selected song
    if not search_results.empty:
        track_id = search_results.iloc[0]['track_id']  # Get the first search result's track_id
        selected_song = search_results.iloc[0]  # Store the selected song's details
        recommendations, distances = recommend_songs_by_id(df, model_knn, track_id)
        
        # Display the recommendations
        print("\nRecommended Songs:")
        print(recommendations[['track_id', 'name', 'artist']])
        
        # Visualize recommendations
        visualize_recommendations(recommendations, distances, selected_song)


Initial Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50683 entries, 0 to 50682
Data columns (total 21 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   track_id             50683 non-null  object 
 1   name                 50683 non-null  object 
 2   artist               50683 non-null  object 
 3   spotify_preview_url  50683 non-null  object 
 4   spotify_id           50683 non-null  object 
 5   tags                 49556 non-null  object 
 6   genre                22348 non-null  object 
 7   year                 50683 non-null  int64  
 8   duration_ms          50683 non-null  int64  
 9   danceability         50683 non-null  float64
 10  energy               50683 non-null  float64
 11  key                  50683 non-null  int64  
 12  loudness             50683 non-null  float64
 13  mode                 50683 non-null  int64  
 14  speechiness          50683 non-null  float64
 15  acousticness  

  df.fillna(df.mean(), inplace=True)
