# RecommenderDemo.ipynb

This notebook:

1. Installs required Python libraries if needed (in one cell).  
2. Imports those libraries.  
3. Loads a Spotify CSV dataset (e.g. `../Data/data.csv`).  
4. Selects features + splits into train/test.  
5. Builds a RandomForest pipeline and trains it.  
6. Plots a learning curve to show how performance changes with data size.  
7. Optionally tests on a small `playlist_test.csv`.


In [1]:
# %% [markdown]
# ## 1) Install Required Libraries

# %%capture
import sys

# We do a broad check for scikit-learn, pandas, matplotlib, etc.
# If they're not installed, pip install them quietly.
!{sys.executable} -m pip install --quiet --upgrade pip
!{sys.executable} -m pip install --quiet pandas numpy matplotlib scikit-learn


In [2]:
# %% [markdown]
# ## 2) Imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, learning_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import os

from sklearn.neighbors import NearestNeighbors



In [3]:
# %% [markdown]
# ## 3) Load the Data

# Adjust the path to your CSV. For example:
CSV_PATH = '../../Data/data.csv'

df = pd.read_csv(CSV_PATH)
print("Data shape:", df.shape)
print("Columns:", df.columns.tolist())

# Show the first few rows
df.head()


Data shape: (170653, 19)
Columns: ['valence', 'year', 'acousticness', 'artists', 'danceability', 'duration_ms', 'energy', 'explicit', 'id', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'name', 'popularity', 'release_date', 'speechiness', 'tempo']


Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [4]:
# %% [markdown]
# ## 4) Select Features & Target

# We'll do a quick classification on 'mode' as an example.
target_col = 'mode'

# Example numeric columns you might have in your CSV:
feature_cols = [
    'acousticness', 'danceability', 'duration_ms', 'energy',
    'instrumentalness', 'liveness', 'loudness', 'speechiness',
    'tempo', 'valence'
]

# Drop rows missing any required columns
df = df.dropna(subset=feature_cols + [target_col])

X = df[feature_cols].copy()
y = df[target_col].copy()

print("After dropping missing data, shape:", X.shape)


After dropping missing data, shape: (170653, 10)


In [5]:
# %% [markdown]
# ## 5) Train/Test Split

# Let's do an 80/20 split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Train size:", X_train.shape, "Test size:", X_test.shape)


Train size: (136522, 10) Test size: (34131, 10)


In [6]:
# %% [markdown]
# ## 6) Build a Pipeline & Train the Model

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42))
])

pipeline.fit(X_train, y_train)
test_score = pipeline.score(X_test, y_test)
print("Test Accuracy:", test_score)


Test Accuracy: 0.7239459728692391


In [8]:
# %% [markdown]
# ## 8) Example: Test on a "Playlist" DataFrame

# Instead of reading from a CSV, let's create a small DataFrame in-code.

playlist_data = [
    {
        'acousticness': 0.10,
        'danceability': 0.75,
        'duration_ms': 210000,
        'energy': 0.80,
        'instrumentalness': 0.0,
        'liveness': 0.15,
        'loudness': -5.0,
        'speechiness': 0.07,
        'tempo': 120.0,
        'valence': 0.65
    },
    {
        'acousticness': 0.30,
        'danceability': 0.62,
        'duration_ms': 185000,
        'energy': 0.55,
        'instrumentalness': 0.0,
        'liveness': 0.12,
        'loudness': -7.5,
        'speechiness': 0.04,
        'tempo': 130.0,
        'valence': 0.45
    },
    {
        'acousticness': 0.80,
        'danceability': 0.45,
        'duration_ms': 240000,
        'energy': 0.25,
        'instrumentalness': 0.1,
        'liveness': 0.20,
        'loudness': -10.0,
        'speechiness': 0.03,
        'tempo': 100.0,
        'valence': 0.30
    },
    {
        'acousticness': 0.15,
        'danceability': 0.68,
        'duration_ms': 200000,
        'energy': 0.75,
        'instrumentalness': 0.0,
        'liveness': 0.25,
        'loudness': -4.0,
        'speechiness': 0.09,
        'tempo': 140.0,
        'valence': 0.70
    },
    {
        'acousticness': 0.55,
        'danceability': 0.50,
        'duration_ms': 220000,
        'energy': 0.45,
        'instrumentalness': 0.0,
        'liveness': 0.10,
        'loudness': -8.0,
        'speechiness': 0.06,
        'tempo': 115.0,
        'valence': 0.55
    }
]

playlist_df = pd.DataFrame(playlist_data)
print("Playlist DataFrame shape:", playlist_df.shape)

# Make sure it has the same feature columns we used
print("Playlist columns:", playlist_df.columns.tolist())

# Predict the 'mode' using our pipeline
playlist_preds = pipeline.predict(playlist_df[feature_cols])
playlist_df['predicted_mode'] = playlist_preds

# Show the results
playlist_df


Playlist DataFrame shape: (5, 10)
Playlist columns: ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence']


Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,predicted_mode
0,0.1,0.75,210000,0.8,0.0,0.15,-5.0,0.07,120.0,0.65,0
1,0.3,0.62,185000,0.55,0.0,0.12,-7.5,0.04,130.0,0.45,1
2,0.8,0.45,240000,0.25,0.1,0.2,-10.0,0.03,100.0,0.3,1
3,0.15,0.68,200000,0.75,0.0,0.25,-4.0,0.09,140.0,0.7,0
4,0.55,0.5,220000,0.45,0.0,0.1,-8.0,0.06,115.0,0.55,1


In [9]:
# %% [markdown]
# ## 9) Recommend Some New Songs via KNN
# We'll do a simple content-based approach:
# 1) Scale numeric features in the main dataset
# 2) Fit a NearestNeighbors model
# 3) For each playlist song, find top 5 neighbors in df

# 1) We'll use the same df & feature_cols from above
#    But let's create separate scaled data specifically for KNN
X_main = df[feature_cols].copy()
scaler_knn = StandardScaler()
X_main_scaled = scaler_knn.fit_transform(X_main)

# 2) Fit the KNN model
knn_model = NearestNeighbors(n_neighbors=6, algorithm='auto')  
# n_neighbors=6 so we get 1 (the song itself) + 5 "similar"
knn_model.fit(X_main_scaled)

# 3) For each playlist track, scale & find neighbors
def recommend_songs_for_track(track_features, n=5):
    # track_features: a 1D array of numeric features (already in the same order as feature_cols)
    # Scale it with the same scaler_knn
    track_scaled = scaler_knn.transform([track_features])

    # kneighbors gives distances + indices from X_main_scaled
    distances, indices = knn_model.kneighbors(track_scaled, n_neighbors=n+1)
    # indices is shape (1, n+1)

    # The first neighbor is often the track itself if it exists in df
    # We'll skip index 0 & return the next n
    rec_indices = indices[0][1:]
    return df.iloc[rec_indices].copy()  # recommended subset from df

all_recommendations = []

for i, row in playlist_df.iterrows():
    print(f"\n=== Playlist Song {i+1} ===")
    # Let's just show the row's numeric features + predicted_mode
    display(row[feature_cols + ['predicted_mode']] if 'predicted_mode' in row else row[feature_cols])

    track_array = row[feature_cols].values
    rec_songs = recommend_songs_for_track(track_array, n=5)
    print("Recommended 5 similar songs (by content-based features):")
    display(rec_songs.head())  # show them

    all_recommendations.append(rec_songs)



=== Playlist Song 1 ===


acousticness             0.10
danceability             0.75
duration_ms         210000.00
energy                   0.80
instrumentalness         0.00
liveness                 0.15
loudness                -5.00
speechiness              0.07
tempo                  120.00
valence                  0.65
predicted_mode           0.00
Name: 0, dtype: float64

Recommended 5 similar songs (by content-based features):




Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
17657,0.653,2010,0.00987,['Kesha'],0.736,204760,0.817,0,3LUWWox8YYykohBbHUrrxd,0.00167,8,0.117,-4.9,1,We R Who We R,72,2010-11-19,0.0407,119.95
88219,0.692,2000,0.182,['Aaron Tippin'],0.746,191800,0.826,0,1vEpm2t75VL7Xl5h59q0L4,0.0,7,0.136,-5.144,1,Big Boy Toys,41,2000-01-01,0.0342,117.956
18191,0.605,2012,0.0132,['Katy Perry'],0.719,227760,0.804,0,55qBw1900pZKfXJ6Q9A2Lc,3e-06,10,0.139,-4.581,1,Teenage Dream,69,2012-03-12,0.0355,119.999
122901,0.591,2010,0.0162,['Katy Perry'],0.719,227741,0.798,0,6AOdKVvWB8Ulb3lGCnyPBY,2e-06,10,0.134,-4.582,1,Teenage Dream,43,2010-01-01,0.0361,120.011
17681,0.591,2010,0.0162,['Katy Perry'],0.719,227741,0.798,0,5jzKL4BDMClWqRguW5qZvh,2e-06,10,0.134,-4.582,1,Teenage Dream,66,2010-01-01,0.0361,120.011



=== Playlist Song 2 ===


acousticness             0.30
danceability             0.62
duration_ms         185000.00
energy                   0.55
instrumentalness         0.00
liveness                 0.12
loudness                -7.50
speechiness              0.04
tempo                  130.00
valence                  0.45
predicted_mode           1.00
Name: 1, dtype: float64

Recommended 5 similar songs (by content-based features):




Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
74618,0.434,2017,0.393,['Chris Stapleton'],0.657,191480,0.577,0,7dUdMZqfGSIt0ZkmTOgRLA,0.0291,2,0.105,-8.586,0,I Was Wrong,62,2017-05-05,0.0341,135.558
170217,0.424,2018,0.336,['Mat Kearney'],0.621,189987,0.61,0,0vy1K9FhCK8woHW7MKEcBG,0.0,5,0.157,-8.455,1,Kings & Queens,56,2018-05-04,0.0454,139.001
32582,0.421,1990,0.267,['Paul Young'],0.57,216373,0.575,0,0YqhqbLNpfQetrwuLaVTiK,0.0,9,0.0823,-7.032,1,Oh Girl,55,1990,0.0318,135.903
168168,0.472,2008,0.275,['Colt Ford'],0.623,205107,0.553,0,4oo8YQ9XlHlhutUNyEno9E,0.0,0,0.124,-9.546,1,No Trash in My Trailer,40,2008-07-04,0.0369,119.975
85431,0.382,1985,0.266,['Brenda K. Starr'],0.574,200640,0.473,0,6cc7q8BUVEfzzUPGt8aYlB,0.0,1,0.147,-7.738,1,Love Me Like the First Time,43,1985-01-01,0.0275,132.956



=== Playlist Song 3 ===


acousticness             0.80
danceability             0.45
duration_ms         240000.00
energy                   0.25
instrumentalness         0.10
liveness                 0.20
loudness               -10.00
speechiness              0.03
tempo                  100.00
valence                  0.30
predicted_mode           1.00
Name: 2, dtype: float64

Recommended 5 similar songs (by content-based features):




Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
82254,0.31,1969,0.77,"['Quincy Jones', 'Matt Monro']",0.472,220160,0.338,0,3Tz5fLDzaPYxvd5MY6gtS1,0.0,7,0.199,-10.747,0,"On Days Like These - From ""The Italian Job"" So...",39,1969-01-01,0.026,95.899
128992,0.295,1961,0.838,['Dinah Washington'],0.411,231000,0.242,0,3RBjN2cKKfMEgpsdTKQbxK,0.0172,5,0.252,-11.034,1,With A Song In My Heart,15,1961-01-01,0.0292,92.675
29692,0.256,1975,0.819,['Bruce Springsteen'],0.464,196707,0.29,0,22wGmrE8HQZHvHC44n7Htm,0.000136,8,0.175,-10.384,1,Meeting Across the River,43,1975-08-25,0.0279,104.659
96112,0.303,1957,0.768,['Nat King Cole'],0.414,228387,0.258,0,4ZYcM6lC5OJtDTVclTEXyn,0.0,8,0.198,-12.687,1,These Foolish Things (Remind Me Of You),15,1957,0.0487,101.285
130456,0.297,1968,0.706,['Townes Van Zandt'],0.459,233027,0.279,0,6hS8sjojXv7kEQa0fRLGJP,0.0,4,0.173,-12.339,1,Many a Fine Lady,20,1968,0.0292,94.93



=== Playlist Song 4 ===


acousticness             0.15
danceability             0.68
duration_ms         200000.00
energy                   0.75
instrumentalness         0.00
liveness                 0.25
loudness                -4.00
speechiness              0.09
tempo                  140.00
valence                  0.70
predicted_mode           0.00
Name: 3, dtype: float64

Recommended 5 similar songs (by content-based features):




Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
90666,0.693,2012,0.0566,['Kreayshawn'],0.706,218013,0.748,1,5OVjZ7Cy7U2gVJX6eLqmFT,0.0,8,0.225,-4.759,1,Go Hard (La.La.La),55,2012-09-14,0.0752,151.988
123209,0.698,2012,0.00593,['Chris Brown'],0.631,200000,0.759,0,3CScJ0ttMJ687s3rlLdrnV,0.0,4,0.235,-4.622,1,Sweet Love,51,2012-07-03,0.048,139.901
140582,0.71,2019,0.0024,['EXO'],0.704,203520,0.729,0,7fK0csBoqbcgUuWGV0cpoD,0.0,7,0.218,-3.851,1,Obsession,70,2019-11-27,0.0624,129.992
168706,0.701,2011,0.186,"['Jason Aldean', 'Ludacris']",0.696,232720,0.776,0,4s3Z1svzMAu8OwmhWLUWHw,0.0,2,0.25,-4.641,1,Dirt Road Anthem (Remix) [feat. Ludacris],42,2011-06-09,0.0409,127.07
140137,0.653,2017,0.142,['BTS'],0.612,196776,0.844,0,3ryjLm3oupIjJFGc39naNi,0.0,5,0.287,-4.694,0,dimple,65,2017-09-18,0.0756,134.868



=== Playlist Song 5 ===


acousticness             0.55
danceability             0.50
duration_ms         220000.00
energy                   0.45
instrumentalness         0.00
liveness                 0.10
loudness                -8.00
speechiness              0.06
tempo                  115.00
valence                  0.55
predicted_mode           1.00
Name: 4, dtype: float64

Recommended 5 similar songs (by content-based features):




Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
46994,0.507,1968,0.637,['Engelbert Humperdinck'],0.538,203133,0.467,0,0oUBuOO4g9P4lREqfqR5nq,0.0,2,0.115,-9.589,1,A Man Without Love,42,1968-08-03,0.0327,111.766
10090,0.55,1972,0.568,['Vicente Fernández'],0.55,182440,0.46,0,4va4REkaDpC31k6jcAbLGY,0.0,5,0.086,-6.244,1,Que Te Vaya Bonito,60,1972,0.0448,111.124
66730,0.584,1978,0.527,['Vicente Fernández'],0.495,159467,0.463,0,2zndZj2MFSNEqzp7C8uxGM,0.000183,7,0.093,-7.369,1,Que Te Vas Te Vas,38,1978,0.0508,120.233
72888,0.513,2009,0.624,['Michael Bublé'],0.458,254747,0.483,0,5i04Jy87RLxoZszJqY3QAN,6e-06,10,0.0754,-7.909,1,Cry Me a River,58,2009-10-06,0.0366,104.823
6687,0.54,1955,0.503,['Sammy Davis Jr.'],0.507,166720,0.366,0,4yKOoRdCpdWfflFZ8vGkGd,0.0,5,0.122,-8.605,1,Hey There - Single Version,26,1955-01-01,0.0407,119.4


In [11]:
# %% [markdown]
# ## 9) Recommend Songs Using Only 'danceability', 'energy', 'tempo'
# 
# We'll do a simple KNN-based content approach again,
# but only using 3 numeric columns. We'll display the recommended
# 'artists' and 'name' so it's more "music-like."

from sklearn.neighbors import NearestNeighbors

feature_cols_small = ['danceability', 'energy', 'tempo']

# Make sure those columns exist
for col in feature_cols_small:
    if col not in df.columns:
        raise ValueError(f"Missing column '{col}' in df. Please adjust feature_cols_small.")

# 1) Filter out rows missing these smaller features
df_small = df.dropna(subset=feature_cols_small)

# 2) Keep "artists" & "name" for display; they won't be used in distance
X_small = df_small[feature_cols_small].copy()

# Scale
scaler_small = StandardScaler()
X_small_scaled = scaler_small.fit_transform(X_small)

# Fit a NearestNeighbors model
knn_model_small = NearestNeighbors(n_neighbors=6)
knn_model_small.fit(X_small_scaled)

def recommend_songs_by_3features(row_data, n=5):
    # row_data: array/list for [danceability, energy, tempo]
    arr = scaler_small.transform([row_data])
    distances, indices = knn_model_small.kneighbors(arr, n_neighbors=n+1)
    # skip the first neighbor (song itself)
    rec_indices = indices[0][1:]
    return df_small.iloc[rec_indices].copy()

playlist_cols_small = ['danceability','energy','tempo']

print("\n==== Recommending songs using only danceability, energy, tempo ====")

for i, row in playlist_df.iterrows():
    row_data = row[playlist_cols_small].values
    rec_songs = recommend_songs_by_3features(row_data, n=5)
    
    print(f"\n--- Playlist Song {i+1} ---")
    # We assume your playlist_df also has 'artists' and 'name'
    print("Artist / Track:", 
          row.get('artists','?'), '/', 
          row.get('name','?'),
          f"(dance={row.get('danceability','?')}, energy={row.get('energy','?')}, tempo={row.get('tempo','?')})")
    
    print("Recommended 5 similar songs:")
    # Show the recommended songs with 'artists','name','danceability','energy','tempo'
    display(rec_songs[['artists','name','danceability','energy','tempo']])



==== Recommending songs using only danceability, energy, tempo ====

--- Playlist Song 1 ---
Artist / Track: ? / ? (dance=0.75, energy=0.8, tempo=120.0)
Recommended 5 similar songs:




Unnamed: 0,artists,name,danceability,energy,tempo
17619,['Taio Cruz'],Dynamite,0.754,0.804,119.968
153748,['Taio Cruz'],Dynamite,0.755,0.792,119.982
18422,['Maroon 5'],Sugar,0.748,0.788,120.076
54686,['Kaoma'],Lambada - Original Version 1989,0.756,0.8,118.921
55891,['Blood Orange'],You're Not Good Enough,0.74,0.798,119.964



--- Playlist Song 2 ---
Artist / Track: ? / ? (dance=0.62, energy=0.55, tempo=130.0)
Recommended 5 similar songs:




Unnamed: 0,artists,name,danceability,energy,tempo
10486,['Lynyrd Skynyrd'],Don't Ask Me No Questions,0.622,0.559,129.836
54816,['Rod Stewart'],Maggie May - Remastered Version,0.62,0.559,129.401
160644,"['Frank Zappa', 'The Mothers Of Invention']",WPLJ,0.622,0.542,130.737
47227,['Christie'],Yellow River,0.627,0.544,129.851
13206,['Fleetwood Mac'],Gypsy,0.622,0.54,130.879



--- Playlist Song 3 ---
Artist / Track: ? / ? (dance=0.45, energy=0.25, tempo=100.0)
Recommended 5 similar songs:




Unnamed: 0,artists,name,danceability,energy,tempo
22853,['ANTONIS NTALGKAS'],Elenitsa,0.452,0.241,99.803
60410,"['Antonio Vivaldi', 'Isaac Stern', 'Eugene Orm...",I. Allegro from Concerto in A minor for Two Vi...,0.444,0.257,100.944
109659,"['Sujan Majhi', 'Girin Chakraborty']",Sahite Parina,0.451,0.264,100.826
129717,['Jay & The Americans'],She Cried,0.457,0.237,99.931
96176,"['Johann Sebastian Bach', 'Jascha Heifetz', 'E...","Concerto in D Minor for Two Violins, BWV 1043:...",0.439,0.248,99.881



--- Playlist Song 4 ---
Artist / Track: ? / ? (dance=0.68, energy=0.75, tempo=140.0)
Recommended 5 similar songs:




Unnamed: 0,artists,name,danceability,energy,tempo
30074,['Talking Heads'],Pulled Up - 2005 Remaster,0.688,0.755,139.853
140839,['Pop Smoke'],Dior - Bonus,0.676,0.746,142.035
162560,['The Clash'],Career Opportunities - Remastered,0.686,0.738,141.392
16969,['P!nk'],Who Knew,0.688,0.734,140.004
155074,"['Rae Sremmurd', 'Swae Lee', 'Slim Jxmmi', 'Tr...",CLOSE (feat. Travis Scott) - From SR3MM,0.691,0.736,140.034



--- Playlist Song 5 ---
Artist / Track: ? / ? (dance=0.5, energy=0.45, tempo=115.0)
Recommended 5 similar songs:




Unnamed: 0,artists,name,danceability,energy,tempo
159578,['Sam Rivers'],Downstairs Blues Upstairs,0.503,0.448,114.295
66133,"['Lonnie Liston Smith', 'The Cosmic Echoes']",Sunset,0.497,0.459,114.861
83235,['Bill Finley'],Faust,0.494,0.45,115.644
155877,['Giorgos Papasideris'],Rina Katerina,0.498,0.439,115.324
28142,['The Byrds'],Goin' Back,0.496,0.449,116.181


In [15]:
# Define the features and filter the dataset
feature_cols_small = ['danceability', 'energy', 'tempo']
df_small = df.dropna(subset=feature_cols_small)
X_small = df_small[feature_cols_small].copy()

# Scale the features
scaler_small = StandardScaler()
X_small_scaled = scaler_small.fit_transform(X_small)

# Train the Nearest Neighbors model
knn_model_small = NearestNeighbors(n_neighbors=6)
knn_model_small.fit(X_small_scaled)

def recommend_songs_by_name(artist, song_name, n=5):
    # Find the matching song in the dataset
    matching_song = df_small[(df_small['artists'] == artist) & (df_small['name'] == song_name)]
    
    if matching_song.empty:
        print(f"Song '{song_name}' by '{artist}' not found in dataset.")
        return []

    # Get the features of the matched song
    row_data = matching_song.iloc[0][feature_cols_small].values
    
    # Find recommendations based on KNN
    arr = scaler_small.transform([row_data])
    distances, indices = knn_model_small.kneighbors(arr, n_neighbors=n+1)
    
    # Skip the first neighbor (the song itself) and return recommended songs
    rec_indices = indices[0][1:]
    return df_small.iloc[rec_indices][['artists', 'name']].values.tolist()

# Test the function with a sample playlist (artist, song_name)
playlist_songs = [
    ('Manfred Mann\'s Earth Band', 'Blinded By The Light'),
    ('Dua Lipa', 'Don\'t Start Now')
]

# Generate recommendations
for artist, song_name in playlist_songs:
    print(f"\n--- Recommendations for '{song_name}' by {artist} ---")
    recommendations = recommend_songs_by_name(artist, song_name, n=5)
    
    # Display the list of recommended songs
    for rec_artist, rec_song in recommendations:
        print(f"{rec_artist} - \"{rec_song}\"")


--- Recommendations for 'Blinded By The Light' by Bruce Spencer ---
Song 'Blinded By The Light' by 'Bruce Spencer' not found in dataset.

--- Recommendations for 'Don't Start Now' by Dua Lipa ---
Song 'Don't Start Now' by 'Dua Lipa' not found in dataset.
