In [17]:
dt=pd.read_csv('./songs_dataset.csv')

In [28]:
def categorize_feelings(feeling):
    feeling_map = {
        "Happy": 1,
        "Sad": 2,
        "Neutral": 3,
        "Excited": 4
    }
    return feeling_map.get(feeling, 0)  # Return 0 or handle unknown feelings as needed


In [29]:
def categorize_topic(topic):
    topic_map = {
        "Party": 1,
        "Hip Hop": 2,
        "Rock": 3,
        "Jazz": 4,
        "Electronic": 5,
        # Add more topics as needed
    }
    return topic_map.get(topic, 0)  # Return 0 or handle unknown topics as needed


In [36]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Load dataset
df = pd.read_csv('./Songs_Dataset.csv')

# Convert 'topic' column to numerical values
df['topic_code'] = pd.Categorical(df['topic']).codes

# Define numerical features for clustering
features = ['danceability', 'loudness', 'acousticness', 'instrumentalness', 'valence', 'energy', 'feelings', 'topic_code']

# Remove unnecessary columns and handle missing data
X = df[features].dropna()

# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply KMeans clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(X_scaled)

# Get cluster labels and silhouette score
labels = kmeans.labels_
silhouette_avg = silhouette_score(X_scaled, labels)

# Add cluster labels to DataFrame
df['Cluster'] = labels

# Function to recommend songs based on user inputs
def recommend_songs(user_feelings, favorite_topic, num_recommendations=5):
    user_input = [user_feelings, favorite_topic]
    user_input_scaled = scaler.transform([user_input + [0] * (len(features) - len(user_input))])[0]
    predicted_cluster = kmeans.predict([user_input_scaled])[0]
    
    cluster_df = df[df['Cluster'] == predicted_cluster]
    
    if not cluster_df.empty:
        cluster_df = cluster_df.sort_values(by=features[0], ascending=False)  # Sorting by 'danceability' as an example
        recommendations = cluster_df.head(num_recommendations)
        return recommendations[['track_name', 'artist_name', 'genre']]
    else:
        return pd.DataFrame(columns=['track_name', 'artist_name', 'genre'])

# Function to get user input
def get_user_input():
    while True:
        try:
            user_feelings = float(input("How are you feeling today? (Enter a number between 0.0 and 1.0): "))
            if not (0.0 <= user_feelings <= 1.0):
                raise ValueError("Feeling score should be between 0.0 and 1.0")
            
            print("Available topics and their codes:")
            for code, topic in zip(df['topic_code'].unique(), df['topic'].unique()):
                print(f"Topic: {topic}, Code: {code}")
            
            favorite_topic = int(input("What is your favorite topic? (Enter the topic code): "))
            
            return user_feelings, favorite_topic
        
        except ValueError as e:
            print(f"Error: {e}. Please enter valid input.")

# Example: Get user input and recommend songs
user_feelings, favorite_topic = get_user_input()
recommended_songs = recommend_songs(user_feelings, favorite_topic)

print("\nTop Recommendations:")
print(recommended_songs)


  super()._check_params_vs_input(X, default_n_init=10)


Available topics and their codes:
Topic: sadness, Code: 5
Topic: world/life, Code: 7
Topic: music, Code: 1
Topic: romantic, Code: 4
Topic: violence, Code: 6
Topic: obscene, Code: 3
Topic: night/time, Code: 2
Topic: feelings, Code: 0

Top Recommendations:
                               track_name        artist_name    genre
27612  ice ice baby (re-recorded version)        vanilla ice  hip hop
18883                      black is black    jungle brothers     jazz
5619                        give it to me          timbaland      pop
5477           sexyback (feat. timbaland)  justin timberlake      pop
6845                             bad liar       selena gomez      pop


