<a href="https://www.kaggle.com/code/phossri/spotify-song-recommendation-system?scriptVersionId=205240055" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns


# Load dataset and select relevant columns
df = pd.read_csv('/kaggle/input/top-10000-spotify-songs-1960-now/top_10000_1960-now.csv')
df = df[['Track Name', 'Artist Name(s)', 'Album Name', 'Album Release Date', 'Popularity', 'Artist Genres', 
         'Danceability', 'Energy', 'Key', 'Loudness', 'Mode', 'Speechiness', 'Acousticness', 'Instrumentalness', 
         'Liveness', 'Valence', 'Tempo', 'Time Signature']]

In [2]:
# Convert release date and create decade column
df['Album Release Date'] = pd.to_datetime(df['Album Release Date'], errors='coerce')
df.dropna(subset=['Album Release Date'], inplace=True)
df['Year'] = df['Album Release Date'].dt.year
df['Decade'] = (df['Year'] // 10) * 10

In [3]:
df.head()

Unnamed: 0,Track Name,Artist Name(s),Album Name,Album Release Date,Popularity,Artist Genres,Danceability,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Time Signature,Year,Decade
0,Justified & Ancient - Stand by the Jams,The KLF,Songs Collection,1992-08-03,0,"acid house,ambient house,big beat,hip house",0.617,0.872,8.0,-12.305,1.0,0.048,0.0158,0.112,0.408,0.504,111.458,4.0,1992,1990
1,I Know You Want Me (Calle Ocho),Pitbull,Pitbull Starring In Rebelution,2009-10-23,64,"dance pop,miami hip hop,pop",0.825,0.743,2.0,-5.995,1.0,0.149,0.0142,2.1e-05,0.237,0.8,127.045,4.0,2009,2000
2,From the Bottom of My Broken Heart,Britney Spears,...Baby One More Time (Digital Deluxe Version),1999-01-12,56,"dance pop,pop",0.677,0.665,7.0,-5.171,1.0,0.0305,0.56,1e-06,0.338,0.706,74.981,4.0,1999,1990
3,Apeman - 2014 Remastered Version,The Kinks,"Lola vs. Powerman and the Moneygoround, Pt. On...",2014-10-20,42,"album rock,art rock,british invasion,classic r...",0.683,0.728,9.0,-8.92,1.0,0.259,0.568,5.1e-05,0.0384,0.833,75.311,4.0,2014,2010
4,You Can't Always Get What You Want,The Rolling Stones,Let It Bleed,1969-12-05,0,"album rock,british invasion,classic rock,rock",0.319,0.627,0.0,-9.611,1.0,0.0687,0.675,7.3e-05,0.289,0.497,85.818,4.0,1969,1960


In [4]:
# Dropping nulls
df.dropna(inplace=True)
df.isna().sum()

Track Name            0
Artist Name(s)        0
Album Name            0
Album Release Date    0
Popularity            0
Artist Genres         0
Danceability          0
Energy                0
Key                   0
Loudness              0
Mode                  0
Speechiness           0
Acousticness          0
Instrumentalness      0
Liveness              0
Valence               0
Tempo                 0
Time Signature        0
Year                  0
Decade                0
dtype: int64

In [5]:
# Removing duplicates and creating a decade column
df.drop_duplicates(keep='first', inplace=True)
df.shape[0]

8139

In [6]:
# Define recommendation function for similar songs
df['Artist Genres'] = df['Artist Genres'].str.split(',')

# Create a recommendation function based on genres
def genre_based_recommendations(track_name, artist_name):
    # Find the genres of the input artist
    idx = df[(df['Track Name'] == track_name) & (df['Artist Name(s)'] == artist_name)].index[0]
    target_genres = set(df.loc[idx, 'Artist Genres'])
    
    # Filter songs that share at least one genre in common
    genre_recommendations = df[df['Artist Genres'].apply(lambda genres: bool(set(genres) & target_genres))]

    # Exclude the original song from the results
    genre_recommendations = genre_recommendations[genre_recommendations.index != idx]
    
    # Return the top 5 recommendations with relevant details
    return genre_recommendations[['Track Name', 'Artist Name(s)', 'Album Name', 'Artist Genres']].head(5)

# Testing song recommendation
metallica_like_songs = genre_based_recommendations("Back In Black", 'AC/DC')
metallica_like_songs

Unnamed: 0,Track Name,Artist Name(s),Album Name,Artist Genres
3,Apeman - 2014 Remastered Version,The Kinks,"Lola vs. Powerman and the Moneygoround, Pt. On...","[album rock, art rock, british invasion, class..."
4,You Can't Always Get What You Want,The Rolling Stones,Let It Bleed,"[album rock, british invasion, classic rock, r..."
5,Don't Stop - 2004 Remaster,Fleetwood Mac,Rumours,"[album rock, classic rock, rock, soft rock, ya..."
7,Something About The Way You Look Tonight - Edi...,Elton John,Candle In The Wind 1997 / Something About ...,"[glam rock, mellow gold, piano rock, rock]"
11,Here Without You,3 Doors Down,Away From The Sun,"[alternative metal, nu metal, post-grunge, rock]"


In [7]:
# Create a function to recommend artists based on shared genres
def artist_based_recommendations(artist_name):
    # Find the genres of the specified artist
    artist_row = df[df['Artist Name(s)'] == artist_name]
    
    # If artist not found, return empty DataFrame
    if artist_row.empty:
        return pd.DataFrame(columns=['Artist Name(s)', 'Artist Genres'])
    
    target_genres = set(artist_row['Artist Genres'].values[0])
    
    # Filter artists that share at least one genre
    genre_recommendations = df[df['Artist Genres'].apply(lambda genres: bool(set(genres) & target_genres))]
    
    # Exclude the original artist from recommendations
    genre_recommendations = genre_recommendations[genre_recommendations['Artist Name(s)'] != artist_name]
    
    return genre_recommendations[['Artist Name(s)', 'Artist Genres']].head(5)

# Test artist recommendations
phill_collins_like_artists = artist_based_recommendations('Queen')
print(phill_collins_like_artists)

        Artist Name(s)                                      Artist Genres
3            The Kinks  [album rock, art rock, british invasion, class...
4   The Rolling Stones  [album rock, british invasion, classic rock, r...
5        Fleetwood Mac  [album rock, classic rock, rock, soft rock, ya...
7           Elton John         [glam rock, mellow gold, piano rock, rock]
11        3 Doors Down   [alternative metal, nu metal, post-grunge, rock]


In [8]:
# Top 3 popular artists per decade, just for curiosity :D
top_artists_per_decade = df.groupby(['Decade', 'Artist Name(s)'])['Popularity'].sum().reset_index()
top_3_artists = top_artists_per_decade.groupby('Decade').apply(lambda x: x.nlargest(3, 'Popularity')).reset_index(drop=True)
print(top_3_artists)

    Decade            Artist Name(s)  Popularity
0     1950             Elvis Presley         326
1     1950  The Dave Brubeck Quartet          70
2     1950                 Bobby Day          64
3     1960               The Beatles        1479
4     1960             Elvis Presley         828
5     1960        The Rolling Stones         798
6     1970                     Queen         827
7     1970             Fleetwood Mac         614
8     1970                    Eagles         583
9     1980                   Madonna         585
10    1980              Phil Collins         574
11    1980                     AC/DC         511
12    1990     Red Hot Chili Peppers         632
13    1990              Mariah Carey         588
14    1990               Spice Girls         578
15    2000                   Madonna        1186
16    2000            Britney Spears        1122
17    2000                Nickelback         918
18    2010                Ed Sheeran        2159
19    2010          

  top_3_artists = top_artists_per_decade.groupby('Decade').apply(lambda x: x.nlargest(3, 'Popularity')).reset_index(drop=True)
