In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import warnings

# Settings the warnings to be ignored
warnings.filterwarnings('ignore')

In [11]:
spotify_data = pd.read_csv('spotify-2023.csv', encoding='ISO-8859-1')

In [12]:
spotify_data.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6


In [13]:
spotify_data.duplicated().sum()

0

In [14]:
# Extract the audio features for the recommendation
recommendation_features = ['bpm', 'danceability_%', 'valence_%', 'energy_%', 'acousticness_%', 'instrumentalness_%', 'liveness_%', 'speechiness_%']

In [15]:
spotify_data[recommendation_features].head()

Unnamed: 0,bpm,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,125,80,89,83,31,0,8,4
1,92,71,61,74,7,0,10,4
2,138,51,32,53,17,0,31,6
3,170,55,58,72,11,0,11,15
4,144,65,23,80,14,63,11,6


In [16]:
# Normalize the features
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(spotify_data[recommendation_features])

# Convert normalized features back to a DataFrame for easier handling
normalized_df = pd.DataFrame(normalized_features, columns=recommendation_features)
normalized_df.head()

Unnamed: 0,bpm,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,0.425532,0.780822,0.913978,0.840909,0.319588,0.0,0.053191,0.032258
1,0.191489,0.657534,0.612903,0.738636,0.072165,0.0,0.074468,0.032258
2,0.51773,0.383562,0.301075,0.5,0.175258,0.0,0.297872,0.064516
3,0.744681,0.438356,0.580645,0.715909,0.113402,0.0,0.085106,0.209677
4,0.560284,0.575342,0.204301,0.806818,0.14433,0.692308,0.085106,0.064516


### Content-Based Recomendation

#### Cosine Similarity

In [17]:
# Compute the cosine similarity matrix
similarity_matrix = cosine_similarity(normalized_df)

# Convert the similarity matrix to a DataFrame for better readability and handling
similarity_df = pd.DataFrame(similarity_matrix, index=spotify_data['track_name'], columns=spotify_data['track_name'])
similarity_df.head()

track_name,Seven (feat. Latto) (Explicit Ver.),LALA,vampire,Cruel Summer,WHERE SHE GOES,Sprinter,Ella Baila Sola,Columbia,fukumean,La Bebe - Remix,...,Privileged Rappers,The Astronaut,BackOutsideBoyz,Broke Boys,The Great War,My Mind & Me,Bigger Than The Whole Sky,A Veces (feat. Feid),En La De Ella,Alone
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Seven (feat. Latto) (Explicit Ver.),1.0,0.975647,0.880344,0.913981,0.755991,0.936269,0.979482,0.910542,0.85585,0.865533,...,0.921663,0.858991,0.796932,0.74103,0.981515,0.805591,0.528567,0.970908,0.974293,0.943181
LALA,0.975647,1.0,0.848438,0.876134,0.761163,0.916185,0.917155,0.90858,0.876697,0.819718,...,0.924463,0.890907,0.798221,0.775496,0.978372,0.707324,0.380635,0.984703,0.997566,0.970845
vampire,0.880344,0.848438,1.0,0.947706,0.808084,0.877307,0.905297,0.879835,0.919387,0.8806,...,0.881834,0.918433,0.875415,0.865885,0.873215,0.860803,0.685082,0.804157,0.850583,0.88173
Cruel Summer,0.913981,0.876134,0.947706,1.0,0.80057,0.910577,0.931923,0.837759,0.855452,0.922431,...,0.913622,0.914379,0.844334,0.835299,0.899534,0.808021,0.604323,0.850565,0.871277,0.87134
WHERE SHE GOES,0.755991,0.761163,0.808084,0.80057,1.0,0.75358,0.754172,0.806952,0.811049,0.73637,...,0.765912,0.847082,0.707696,0.764178,0.775037,0.717606,0.52949,0.702161,0.759296,0.812195


In [25]:
similarity_df.shape

(953, 953)

Based on song

In [21]:
def get_song_recommendations(song_name, num_recommendations=5):
    """
    Get song recommendations based on a given song.

    Parameters:
    - song_name: Name of the song provided by the user.
    - num_recommendations: Number of songs to recommend (default is 5).

    Returns:
    - List of recommended songs.
    """
    # Check if the song is in our dataset
    if song_name not in similarity_df.index:
        return "Sorry, the song was not found in the dataset."

    # Get the similarity values for the given song
    song_similarities = similarity_df[song_name].sort_values(ascending=False)

    # Get the most similar songs (excluding the input song itself)
    recommended_songs = song_similarities.iloc[1:num_recommendations+1].index.tolist()

    return recommended_songs

# Test the function with a sample song
sample_song = "I Wanna Be Yours"
test_recommendations = get_song_recommendations(sample_song, 10)
test_recommendations

['Mejor Que Yo',
 'No Se Va',
 'Ferxxo 100',
 'Grapejuice',
 'Volando - Remix',
 'Leave The Door Open',
 'Come Back Home - From "Purple Hearts"',
 "she's all i wanna be",
 'Di Que Si',
 'WANDA']

### Save similarity matrix as csv file

In [24]:
similarity_df.to_csv('similarity_df.csv', index=True)

In [23]:
similarity_df

track_name,Seven (feat. Latto) (Explicit Ver.),LALA,vampire,Cruel Summer,WHERE SHE GOES,Sprinter,Ella Baila Sola,Columbia,fukumean,La Bebe - Remix,...,Privileged Rappers,The Astronaut,BackOutsideBoyz,Broke Boys,The Great War,My Mind & Me,Bigger Than The Whole Sky,A Veces (feat. Feid),En La De Ella,Alone
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Seven (feat. Latto) (Explicit Ver.),1.000000,0.975647,0.880344,0.913981,0.755991,0.936269,0.979482,0.910542,0.855850,0.865533,...,0.921663,0.858991,0.796932,0.741030,0.981515,0.805591,0.528567,0.970908,0.974293,0.943181
LALA,0.975647,1.000000,0.848438,0.876134,0.761163,0.916185,0.917155,0.908580,0.876697,0.819718,...,0.924463,0.890907,0.798221,0.775496,0.978372,0.707324,0.380635,0.984703,0.997566,0.970845
vampire,0.880344,0.848438,1.000000,0.947706,0.808084,0.877307,0.905297,0.879835,0.919387,0.880600,...,0.881834,0.918433,0.875415,0.865885,0.873215,0.860803,0.685082,0.804157,0.850583,0.881730
Cruel Summer,0.913981,0.876134,0.947706,1.000000,0.800570,0.910577,0.931923,0.837759,0.855452,0.922431,...,0.913622,0.914379,0.844334,0.835299,0.899534,0.808021,0.604323,0.850565,0.871277,0.871340
WHERE SHE GOES,0.755991,0.761163,0.808084,0.800570,1.000000,0.753580,0.754172,0.806952,0.811049,0.736370,...,0.765912,0.847082,0.707696,0.764178,0.775037,0.717606,0.529490,0.702161,0.759296,0.812195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
My Mind & Me,0.805591,0.707324,0.860803,0.808021,0.717606,0.820281,0.877326,0.872293,0.822830,0.836975,...,0.766308,0.734772,0.739995,0.708671,0.770482,1.000000,0.914068,0.673992,0.721053,0.775934
Bigger Than The Whole Sky,0.528567,0.380635,0.685082,0.604323,0.529490,0.558631,0.665652,0.645065,0.572939,0.642219,...,0.477775,0.485726,0.522500,0.505006,0.495605,0.914068,1.000000,0.337681,0.392244,0.488695
A Veces (feat. Feid),0.970908,0.984703,0.804157,0.850565,0.702161,0.928006,0.905283,0.847442,0.837234,0.830181,...,0.932713,0.815500,0.803470,0.730533,0.945626,0.673992,0.337681,1.000000,0.988365,0.920664
En La De Ella,0.974293,0.997566,0.850583,0.871277,0.759296,0.933421,0.913442,0.908928,0.894846,0.838514,...,0.941449,0.881411,0.824655,0.790442,0.967469,0.721053,0.392244,0.988365,1.000000,0.967555


In [26]:
####################################################

In [27]:
spotify_data.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,bpm,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,125,B,Major,80,89,83,31,0,8,4
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,92,C#,Major,71,61,74,7,0,10,4
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,138,F,Major,51,32,53,17,0,31,6
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,170,A,Major,55,58,72,11,0,11,15
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,144,A,Minor,65,23,80,14,63,11,6


In [28]:
spotify_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 953 entries, 0 to 952
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   track_name            953 non-null    object
 1   artist(s)_name        953 non-null    object
 2   artist_count          953 non-null    int64 
 3   released_year         953 non-null    int64 
 4   released_month        953 non-null    int64 
 5   released_day          953 non-null    int64 
 6   in_spotify_playlists  953 non-null    int64 
 7   in_spotify_charts     953 non-null    int64 
 8   streams               953 non-null    object
 9   in_apple_playlists    953 non-null    int64 
 10  in_apple_charts       953 non-null    int64 
 11  in_deezer_playlists   953 non-null    object
 12  in_deezer_charts      953 non-null    int64 
 13  in_shazam_charts      903 non-null    object
 14  bpm                   953 non-null    int64 
 15  key                   858 non-null    ob

In [29]:
spotify_data['streams_M'] = pd.to_numeric(spotify_data['streams'], errors='coerce')

In [30]:
spotify_data['streams_M'] = spotify_data['streams_M'] / 1000000

In [31]:
spotify_data['streams_M'] = spotify_data['streams_M'].round(3)

In [32]:
spotify_data.head()

Unnamed: 0,track_name,artist(s)_name,artist_count,released_year,released_month,released_day,in_spotify_playlists,in_spotify_charts,streams,in_apple_playlists,...,key,mode,danceability_%,valence_%,energy_%,acousticness_%,instrumentalness_%,liveness_%,speechiness_%,streams_M
0,Seven (feat. Latto) (Explicit Ver.),"Latto, Jung Kook",2,2023,7,14,553,147,141381703,43,...,B,Major,80,89,83,31,0,8,4,141.382
1,LALA,Myke Towers,1,2023,3,23,1474,48,133716286,48,...,C#,Major,71,61,74,7,0,10,4,133.716
2,vampire,Olivia Rodrigo,1,2023,6,30,1397,113,140003974,94,...,F,Major,51,32,53,17,0,31,6,140.004
3,Cruel Summer,Taylor Swift,1,2019,8,23,7858,100,800840817,116,...,A,Major,55,58,72,11,0,11,15,800.841
4,WHERE SHE GOES,Bad Bunny,1,2023,5,18,3133,50,303236322,84,...,A,Minor,65,23,80,14,63,11,6,303.236
