In [3]:
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder


# Load and preprocess dataset
@st.cache_data
def load_data():
    df = pd.read_csv("spotify_dataset.csv")
    df = df.sample(n=5000, random_state=42).reset_index(drop=True)

    # Remove regional genres
    for genre in ['kollywood', 'tollywood', 'mollywood', 'sandalwood']:
        df = df[~df['Genre'].str.contains(genre, case=False, na=False)]

    df_encoded = df.copy()
    categorical_cols = ['Track Name', 'Artist(s)', 'Album', 'Release Date', 'Genre']
    le = LabelEncoder()
    for col in categorical_cols:
        df_encoded[col] = le.fit_transform(df_encoded[col].astype(str))

    numerical_features = categorical_cols
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df_encoded[numerical_features])
    df_scaled = pd.DataFrame(df_scaled, columns=numerical_features)

 # KMeans clustering
    optimal_k = 5
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)
    df["Cluster"] = kmeans.fit_predict(df_scaled)

    return df, df_scaled

# Song recommendation logic
def recommend_songs(song_name, df, num_recommendations=5):
    try:
        song_cluster = df[df["Track Name"] == song_name]["Cluster"].values[0]
        same_cluster_songs = df[df["Cluster"] == song_cluster]
        song_index = same_cluster_songs[same_cluster_songs["Track Name"] == song_name].index[0]
        cluster_features = same_cluster_songs.select_dtypes(include=['int64', 'float64'])
        similarity = cosine_similarity(cluster_features, cluster_features)
        similar_songs = np.argsort(similarity[song_index])[-(num_recommendations + 1):-1][::-1]
        recommendations = same_cluster_songs.iloc[similar_songs][["Track Name", "Genre", "Artist(s)"]]
        return recommendations
    except Exception as e:
        st.error(f"Error: {e}")
        return pd.DataFrame(columns=["Track Name", "Genre", "Artist(s)"])
# Streamlit UI
def main():
    st.title("🎧 Spotify Song Recommender")
    st.write("Get song recommendations based on your favorite track!")

    df, df_scaled = load_data()
    unique_tracks = sorted(df["Track Name"].unique())

    selected_song = st.selectbox("Select a song:", unique_tracks)
    num_rec = st.slider("Number of recommendations:", 1, 10, 5)

    if st.button("Recommend"):
        recommendations = recommend_songs(selected_song, df, num_recommendations=num_rec)
        if not recommendations.empty:
            st.subheader(f"Songs similar to: {selected_song}")
            st.dataframe(recommendations)
        else:
            st.warning("No recommendations found.")
    else:
        st.warning("please select a song first.")

