# Song Recommendation System

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('../src')
from preprocessing import load_data, clean_data
from recommender import MusicRecommender

In [2]:
df_raw = load_data('../data/spotify_2000.csv')
df = clean_data(df_raw)
print(f'Dataset loaded: {df.shape[0]} songs')
print(df.head())

Dataset loaded: 2000 songs
  primary_genre  popularity  danceability  energy    tempo
0           pop        77.0         0.751   0.834   95.053
1          rock        79.0         0.434   0.897  148.726
2           pop        66.0         0.529   0.496  136.859
3          rock        78.0         0.551   0.913  119.992
4           pop        65.0         0.614   0.928  172.656


In [3]:
recommender = MusicRecommender(df)

In [4]:
song_idx = 10
selected_song = df.iloc[song_idx]
print('SELECTED SONG:')
print(selected_song)
print(f'\nGenre: {selected_song["primary_genre"]}')
print(f'Popularity: {selected_song["popularity"]:.2f}')

SELECTED SONG:
primary_genre        pop
popularity           1.0
danceability       0.617
energy             0.728
tempo            139.066
Name: 10, dtype: object

Genre: pop
Popularity: 1.00


In [5]:
recommendations_genre = recommender.recommend_by_genre(song_idx, n_recommendations=5)
print('\nRECOMMENDATIONS BY GENRE:')
print(recommendations_genre[['primary_genre', 'popularity', 'danceability', 'energy', 'tempo']])


RECOMMENDATIONS BY GENRE:
     primary_genre  popularity  danceability  energy    tempo
1311           pop        88.0         0.445   0.537  122.769
1613           pop        86.0         0.764   0.705  101.003
1819           pop        86.0         0.351   0.296  115.284
1929           pop        86.0         0.351   0.296  115.284
1227           pop        85.0         0.726   0.698  143.994


In [6]:
recommendations_similarity = recommender.recommend_by_similarity(song_idx, n_recommendations=5)
print('\nRECOMMENDATIONS BY SIMILARITY:')
print(recommendations_similarity[['primary_genre', 'popularity', 'danceability', 'energy', 'tempo']])


RECOMMENDATIONS BY SIMILARITY:
          primary_genre  popularity  danceability  energy    tempo
1083  World/Traditional         0.0         0.550   0.753  139.048
835                 pop         0.0         0.692   0.911  138.975
1750            hip hop         0.0         0.636   0.517  139.928
1549            hip hop         0.0         0.819   0.768  139.982
698                 pop         0.0         0.688   0.734  140.004


In [7]:
weights = {'danceability': 1.5, 'energy': 1.2, 'tempo': 1.0}
recommendations_weighted = recommender.recommend_by_audio_features(song_idx, n_recommendations=5, weights=weights)
print('\nRECOMMENDATIONS BY WEIGHTED AUDIO FEATURES:')
print(f'Weights: {weights}')
print(recommendations_weighted[['primary_genre', 'popularity', 'danceability', 'energy', 'tempo']])


RECOMMENDATIONS BY WEIGHTED AUDIO FEATURES:
Weights: {'danceability': 1.5, 'energy': 1.2, 'tempo': 1.0}
          primary_genre  popularity  danceability  energy    tempo
1083  World/Traditional         0.0         0.550   0.753  139.048
822                 pop        76.0         0.724   0.695  139.000
1598                pop        70.0         0.775   0.825  139.091
835                 pop         0.0         0.692   0.911  138.975
1775            hip hop        64.0         0.865   0.687  139.023


In [8]:
def test_recommendation_system(recommender, num_songs=5):
    np.random.seed(42)
    test_indices = np.random.choice(len(recommender.df), num_songs, replace=False)
    
    for idx in test_indices:
        song = recommender.df.iloc[idx]
        recs_genre = recommender.recommend_by_genre(idx, n_recommendations=3)
        recs_sim = recommender.recommend_by_similarity(idx, n_recommendations=3)
        
        print(f"\nSong Index: {idx} | Genre: {song['primary_genre']} | Popularity: {song['popularity']:.2f}")
        print(f"  Genre-based recommendations: {len(recs_genre)} found")
        print(f"  Similarity-based recommendations: {len(recs_sim)} found")

In [9]:
test_recommendation_system(recommender, num_songs=5)


Song Index: 1860 | Genre: pop | Popularity: 72.00
  Genre-based recommendations: 3 found
  Similarity-based recommendations: 3 found

Song Index: 353 | Genre: hip hop | Popularity: 72.00
  Genre-based recommendations: 3 found
  Similarity-based recommendations: 3 found

Song Index: 1333 | Genre: hip hop | Popularity: 60.00
  Genre-based recommendations: 3 found
  Similarity-based recommendations: 3 found

Song Index: 905 | Genre: pop | Popularity: 74.00
  Genre-based recommendations: 3 found
  Similarity-based recommendations: 3 found

Song Index: 1289 | Genre: hip hop | Popularity: 74.00
  Genre-based recommendations: 3 found
  Similarity-based recommendations: 3 found


In [10]:
genre_dist = recommender.df['primary_genre'].value_counts()
print(f'Total genres: {len(genre_dist)}')
print(f'\nTop 10 genres:')
print(genre_dist.head(10))

Total genres: 12

Top 10 genres:
primary_genre
pop                  936
hip hop              776
rock                 162
Dance/Electronic      41
set()                 22
latin                 15
R&B                   13
country               11
World/Traditional     10
metal                  9
Name: count, dtype: int64


In [11]:
avg_features = recommender.df[['danceability', 'energy', 'tempo', 'popularity']].mean()
print('\nAverage Audio Features:')
print(avg_features.round(3))


Average Audio Features:
danceability      0.667
energy            0.720
tempo           120.123
popularity       59.872
dtype: float64
