In [None]:
# Step 1: Install required libraries
!pip install gradio scikit-learn pandas

# Step 2: Import libraries
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import gradio as gr

# Step 3: Load dataset from a public URL
url = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-01-21/spotify_songs.csv"
df = pd.read_csv(url)

# Step 4: Clean the dataset
df = df.dropna(subset=['track_name', 'track_artist', 'danceability', 'energy', 'loudness', 'valence', 'tempo'])
df = df.drop_duplicates(subset='track_name')
df['track_name'] = df['track_name'].astype(str)

# Step 5: Feature selection and normalization
features = ['danceability', 'energy', 'loudness', 'valence', 'tempo']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df[features])

# Step 6: Train KNN model
knn = NearestNeighbors(n_neighbors=6, metric='euclidean')
knn.fit(X_scaled)

# Step 7: Recommendation function
def recommend(song_name):
    if song_name not in df['track_name'].values:
        return "Sorry, song not found in the dataset."

    idx = df[df['track_name'] == song_name].index[0]
    distances, indices = knn.kneighbors([X_scaled[idx]])

    recommendations = []
    for i in indices[0]:
        if i != idx:
            song = df.iloc[i]
            recommendations.append(f"{song['track_name']} by {song['track_artist']}")
    return "\n".join(recommendations)

# Step 8: Build Gradio app
demo = gr.Interface(
    fn=recommend,
    inputs=gr.Dropdown(choices=sorted(df['track_name'].unique()), label="Choose a Song"),
    outputs=gr.Textbox(label="Recommended Songs 🎶"),
    title="🎵 Public Spotify Recommender",
    description="Pick a song and get similar recommendations using K-Nearest Neighbors."
)

demo.launch()
