In [19]:
import pandas as pd
import numpy as np

# Load data

merged_data = pd.read_csv('merged_data.csv')
updated_merged_data = pd.read_csv('updated_merged_data.csv')


In [20]:
from sklearn.metrics.pairwise import cosine_similarity

def format_recommendations(recommendations, reason):
    """
    Format recommendations into a user-friendly view.
    
    Parameters:
    - recommendations: DataFrame of recommended songs.
    - reason: String explaining why the songs were recommended.
    
    Returns:
    A formatted string with key song details and the reason for recommendation.
    """
    formatted = []
    for _, row in recommendations.iterrows():
        details = (
            f"Track Name: {row['track_name']}\n"
            f"Artist: {row['artist']}\n"
            f"Genre: {row['track_genre']}\n"
            f"Cluster: {row['cluster_label']}\n"
            f"Reason: {reason}\n"
        )
        formatted.append(details)
    return "\n".join(formatted)


def recommend_same_cluster(track_id, df, num_recommendations=5, raw=False):
    if track_id not in df['track_id'].values:
        raise ValueError(f"Track ID '{track_id}' not found in the dataset.")
    
    cluster = df.loc[df['track_id'] == track_id, 'cluster'].values[0]
    same_cluster = df[df['cluster'] == cluster]
    recommendations = same_cluster[same_cluster['track_id'] != track_id].sample(
        n=min(num_recommendations, len(same_cluster))
    )
    
    if raw:
        return recommendations  # Return raw DataFrame for calculations
    return format_recommendations(recommendations, "From the same cluster as the selected track.")


def recommend_similar_songs(track_id, df, num_recommendations=5, features=None, raw=False):
    if features is None:
        features = ['danceability_x', 'energy_x', 'valence_x', 'tempo_x', 
                    'acousticness_x', 'speechiness_log', 'instrumentalness_log', 'liveness_log']
    if track_id not in df['track_id'].values:
        raise ValueError(f"Track ID '{track_id}' not found in the dataset.")
    
    track_features = df[df['track_id'] == track_id][features].values.reshape(1, -1)
    similarity_scores = cosine_similarity(track_features, df[features])[0]
    df['similarity'] = similarity_scores
    
    recommendations = df[df['track_id'] != track_id].sort_values(by='similarity', ascending=False).head(num_recommendations)
    
    if raw:
        return recommendations  # Return raw DataFrame for calculations
    return format_recommendations(recommendations, "Based on similar features to the selected track.")


def recommend_from_neighbors(track_id, df, num_recommendations=5, raw=False):
    if track_id not in df['track_id'].values:
        raise ValueError(f"Track ID '{track_id}' not found in the dataset.")
    
    cluster = df.loc[df['track_id'] == track_id, 'cluster'].values[0]
    pca_cluster_centers = df.groupby('cluster')[['pca1', 'pca2']].mean()
    selected_cluster_pca = pca_cluster_centers.loc[cluster].values.reshape(1, -1)
    distances = cosine_similarity(selected_cluster_pca, pca_cluster_centers)[0]
    similar_clusters = pca_cluster_centers.index[np.argsort(-distances)]
    
    recommendations = []
    for similar_cluster in similar_clusters:
        if similar_cluster == cluster:
            continue
        similar_cluster_tracks = df[df['cluster'] == similar_cluster]
        recommendations.append(similar_cluster_tracks)
        if len(pd.concat(recommendations)) >= num_recommendations:
            break
    
    if recommendations:
        recommendations_df = pd.concat(recommendations).sample(n=min(num_recommendations, len(pd.concat(recommendations))))
        if raw:
            return recommendations_df  # Return raw DataFrame for calculations
        return format_recommendations(recommendations_df, "From clusters neighboring the selected track's cluster.")
    
    return "No recommendations found in neighboring clusters."




# Example song for recommendation (A valid track_id from dataset)
track_id = '1iJBSr7s7jYXzM8EGcbK5b'

# 1. Recommend songs within the same cluster
print("Recommendations from the same cluster:")
print(recommend_same_cluster(track_id, merged_data))

# 2. Recommend similar songs within the same cluster
print("Recommendations based on similar features:")
print(recommend_similar_songs(track_id, merged_data))

# 3. Recommend songs from neighboring clusters
print("Recommendations from neighboring clusters:")
print(recommend_from_neighbors(track_id, merged_data))


def calculate_diversity(recommendations, feature_columns):
    """
    Calculate feature variance for diversity.
    """
    # Ensure recommendations is a DataFrame
    feature_variance = recommendations[feature_columns].var().mean()
    return feature_variance

# Example track ID
track_id = '1EzrEOXmMH3G43AXT1y7pA'


# Updated feature columns based on your dataset
feature_columns = ['danceability_x', 'valence_x', 'tempo_x']

# Get KMeans recommendations as a raw DataFrame
kmeans_recommendations = recommend_same_cluster(track_id, merged_data, raw=True)

# Calculate diversity for KMeans recommendations
kmeans_diversity = calculate_diversity(kmeans_recommendations, feature_columns=feature_columns)

print(f"KMeans Recommendation Diversity: {kmeans_diversity}")



Recommendations from the same cluster:
Track Name: Lights Out
Artist: Village
Genre: french
Cluster: Dark & Moody Alternative - Tracks with moderate energy and a darker mood. Perfect for fans of alternative music, electronic beats, or introspective moments.
Reason: From the same cluster as the selected track.

Track Name: Treat You Better
Artist: RÜFÜS DU SOL
Genre: electro
Cluster: Dark & Moody Alternative - Tracks with moderate energy and a darker mood. Perfect for fans of alternative music, electronic beats, or introspective moments.
Reason: From the same cluster as the selected track.

Track Name: Screams Turn to Silence
Artist: The Agony Scene
Genre: death-metal
Cluster: Dark & Moody Alternative - Tracks with moderate energy and a darker mood. Perfect for fans of alternative music, electronic beats, or introspective moments.
Reason: From the same cluster as the selected track.

Track Name: 我們的愛
Artist: F.I.R.
Genre: mandopop
Cluster: Dark & Moody Alternative - Tracks with moderate

In [21]:

def recommend_same_graph_cluster(track_id, df, num_recommendations=5, raw=False):
    if track_id not in df['track_id'].values:
        raise ValueError(f"Track ID '{track_id}' not found in the dataset.")
    
    graph_cluster = df.loc[df['track_id'] == track_id, 'LouvainCluster'].values[0]
    same_cluster = df[df['LouvainCluster'] == graph_cluster]
    recommendations = same_cluster[same_cluster['track_id'] != track_id].sample(
        n=min(num_recommendations, len(same_cluster))
    )
    
    if raw:
        return recommendations  # Return raw DataFrame for calculations
    return format_recommendations(recommendations, "From the same Louvain cluster as the selected track.")


def recommend_from_graph_neighbors(track_id, df, num_recommendations=5, raw=False):
    if track_id not in df['track_id'].values:
        raise ValueError(f"Track ID '{track_id}' not found in the dataset.")
    
    # Identify the current cluster
    graph_cluster = df.loc[df['track_id'] == track_id, 'LouvainCluster'].values[0]
    
    # Calculate cluster "distances" (here, simply based on modularity and neighbors)
    cluster_neighbors = df['LouvainCluster'].value_counts().index  # Ranked clusters
    
    recommendations = []
    for neighbor_cluster in cluster_neighbors:
        if neighbor_cluster == graph_cluster:
            continue
        neighbor_tracks = df[df['LouvainCluster'] == neighbor_cluster]
        recommendations.append(neighbor_tracks)
        if len(pd.concat(recommendations)) >= num_recommendations:
            break
    
    if recommendations:
        recommendations_df = pd.concat(recommendations).sample(n=min(num_recommendations, len(pd.concat(recommendations))))
        if raw:
            return recommendations_df  # Return raw DataFrame for calculations
        return format_recommendations(recommendations_df, "From Louvain clusters neighboring the selected track's cluster.")
    
    return "No recommendations found in neighboring clusters."


# Recommendations from the same Louvain cluster
print("Recommendations from the same Louvain cluster:")
print(recommend_same_graph_cluster(track_id, updated_merged_data))

# Recommendations from neighboring Louvain clusters
print("Recommendations from neighboring Louvain clusters:")
print(recommend_from_graph_neighbors(track_id, updated_merged_data))


# Example: Diversity of Graph-Based (Louvain) recommendations
graph_recommendations = recommend_same_graph_cluster(track_id, updated_merged_data, raw=True)  # Get raw recommendations
graph_diversity = calculate_diversity(graph_recommendations, feature_columns=feature_columns)

print(f"Louvain Recommendation Diversity: {graph_diversity}")



Recommendations from the same Louvain cluster:
Track Name: Theme from "The Addams Family"
Artist: The Countdown Kids
Genre: children
Cluster: Chilled Acoustic & Folk Vibes - Mellow tracks with moderate positivity and acoustic instrumentation. Suitable for casual settings, relaxing evenings, or cozy vibes.
Reason: From the same Louvain cluster as the selected track.

Track Name: Build Me Up From Bones
Artist: Sarah Jarosz
Genre: bluegrass
Cluster: Mellow Acoustic & Relaxing Tunes - Slow-paced tracks with low energy and high acousticness. Ideal for calm environments like meditation, study sessions, or unwinding.
Reason: From the same Louvain cluster as the selected track.

Track Name: Our Day Will Come
Artist: Amy Winehouse
Genre: british
Cluster: Upbeat Dance & Party Hits - Tracks with high danceability and positivity. These are fun and lively, perfect for celebrations, parties, or uplifting moods.
Reason: From the same Louvain cluster as the selected track.

Track Name: god save me, bu