In [4]:
import pandas as pd
import datetime as dt


In [5]:
# Load the dataset
file_path = 'Most Streamed Spotify Songs 2024.csv'
spotify_data = pd.read_csv(file_path, encoding='latin1')

In [6]:
# Check column names to ensure no duplicates
print("Original columns:", spotify_data.columns)

Original columns: Index(['Track', 'Album Name', 'Artist', 'Release Date', 'ISRC',
       'All Time Rank', 'Track Score', 'Spotify Streams',
       'Spotify Playlist Count', 'Spotify Playlist Reach',
       'Spotify Popularity', 'YouTube Views', 'YouTube Likes', 'TikTok Posts',
       'TikTok Likes', 'TikTok Views', 'YouTube Playlist Reach',
       'Apple Music Playlist Count', 'AirPlay Spins', 'SiriusXM Spins',
       'Deezer Playlist Count', 'Deezer Playlist Reach',
       'Amazon Playlist Count', 'Pandora Streams', 'Pandora Track Stations',
       'Soundcloud Streams', 'Shazam Counts', 'TIDAL Popularity',
       'Explicit Track'],
      dtype='object')


In [7]:
# Remove duplicate columns if any
spotify_data = spotify_data.loc[:, ~spotify_data.columns.duplicated()]

In [8]:
# Convert release date to datetime format
spotify_data['Release Date'] = pd.to_datetime(spotify_data['Release Date'], errors='coerce')

In [14]:
def recommend_songs_by_date(data, start_date=None, end_date=None, top_n=10, sort_by='Spotify Streams'):
    # Create a copy of the data to avoid modifying the original
    filtered_data = data.copy()
    
    # Ensure the sort_by column is numeric and handle NaN values
    filtered_data[sort_by] = pd.to_numeric(
        filtered_data[sort_by].astype(str).str.replace(',', ''),
        errors='coerce'
    ).fillna(0)
    
    # Calculate All Time Rank based on the complete dataset before filtering
    # Using fillna for rank calculation to handle any remaining NaN values
    filtered_data['All Time Rank'] = filtered_data[sort_by].fillna(0).rank(
        ascending=False,
        method='dense'
    ).astype(int)
    
    # Filter data based on dates
    if start_date:
        filtered_data = filtered_data[filtered_data['Release Date'] >= pd.to_datetime(start_date)]
    if end_date:
        filtered_data = filtered_data[filtered_data['Release Date'] <= pd.to_datetime(end_date)]
    
    # Sort data and get top N records
    if top_n:
        filtered_data = filtered_data.nlargest(top_n, sort_by)
    else:
        filtered_data = filtered_data.sort_values(by=sort_by, ascending=False)
    
    # Format Spotify Streams for display
    filtered_data[sort_by] = filtered_data[sort_by].apply(lambda x: '{:,.0f}'.format(x))
    
    return filtered_data[['Track', 'Artist', 'Album Name', 'Release Date', sort_by, 'All Time Rank']]

In [15]:
# Example usage
start_date = '2023-01-04'
end_date = '2024-01-01'
recommended_songs = recommend_songs_by_date(
    spotify_data, 
    start_date=start_date, 
    end_date=end_date, 
    top_n=10, 
    sort_by='Spotify Streams'
)

In [16]:
# Display debugging information
print("\nSample of original Spotify Streams:")
print(spotify_data['Spotify Streams'].head())

print("\nRecommended Songs:")
print(recommended_songs)


Sample of original Spotify Streams:
0      390,470,936
1      323,703,884
2      601,309,283
3    2,031,280,633
4      107,034,922
Name: Spotify Streams, dtype: object

Recommended Songs:
                                    Track                   Artist  \
3                                 Flowers              Miley Cyrus   
3758                              Flowers              Miley Cyrus   
1630                              Flowers              Miley Cyrus   
4331                            Kill Bill                      SZA   
1113                            Kill Bill          pasa michaelson   
155   Seven (feat. Latto) (Explicit Ver.)                Jung Kook   
389      Seven (feat. Latto) (Clean Ver.)                Jung Kook   
2132      Gangsta's Paradise (feat. L.V.)                   Coolio   
1265                 Danza Kuduro - Cover            MUSIC LAB JPN   
862                  Danza Kuduro - Cover  DJ MIX NON-STOP CHANNEL   

                                        