In [2]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

import access_token

Access token obtained successfully.


Define a function responsible for collecting music data from a playlist.

In [7]:
# Read the access token from the file
with open('access_token.txt', 'r') as f:
    access_token = f.read().strip()

In [4]:
def get_trending_playlist_data(playlist_id, access_token):
    # playlist_id - ID of the Spotify playlist
    # access_token - access token used to authenticate with the Spotify API
    
    # Set up Spotify with the access token to initialize a Spotify client
    sp = spotipy.Spotify(auth=access_token)
    
    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')
    
    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        # Accumulate data for all tracks in the playlist
        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df
    

In [8]:
playlist_id = '4vi25H3SpsJzMLLIbrBGA0'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                                           Track Name  \
0                   Paul & Silas (At Midnight) - Live   
1                                          Live Again   
2                           Mercy (feat. Chris Brown)   
3                                         Do It Again   
4                                 The Blessing (Live)   
..                                                ...   
95                                  Yek' Umus' Ongaka   
96                        God you keep on blessing me   
97                                      Walk With You   
98  Jesus at the Center/All Hail King Jesus (feat....   
99                                     He Shall Reign   

                                              Artists  \
0                         Naomi Raine, Chandler Moore   
1          ONE HOUSE, Kyle McHargh, Roosevelt Stewart   
2   Elevation Worship, Maverick City Music, Chris ...   
3                                   Elevation Worship   
4           Elevation Worship,

In [9]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


Build the music recommendation system

In [10]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
data = music_df

In [12]:
# Function to calculate weighted popularity scores based on release date
def calculated_weighted_popularity(release_date):
    # Convert release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')
    
    # Calculate the time span between release data and today's date
    time_span = datetime.now() - release_date
    
    # Calculate the weighted popularity score based on time span
    weight = 1 / (time_span.days + 1)
    return weight

- The function described above takes the release date of a music track in the format 'YYYY-MM-DD' and converts it into a datetime object using Python's datetime.strptime function. This conversion allows for date arithmetic. The function then calculates the time span between the track's release date and the current date (today) using datetime.now() - release_date, resulting in a timedate object that represents the difference between the two dates.

- Next, the function computes a weighted popularity score based on this time span. The weight is calculated using the formula 1 / (time_span.days + 1). The time_span.days attribute provides the number of days between the release date and today. Adding 1 ensures the weight is never zero, preventing division by zero errors, even for very recent releases.

- The idea behind this formula is that the weight decreases as the time span increases. Thus, recent releases receive a higher weight, while older releases receive a lower weight. This approach means that when combining this weighted popularity score with other factors in a recommendation system, newer tracks will have a more significant influence on the final recommendations, aligning with users' potential interest in the latest music.

Normalize music features

In [13]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values

music_features_scaled = scaler.fit_transform(music_features)

Generate music recommendations based on the music audio features

In [14]:
# A function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return
    
    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]
    
    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)
    
    # Get the indices of the most similar songs and exclude the first element (which would be the input song itself, with a similarity score of 1)
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]
    
    # Get the names of the most similar songs based on the content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]
    
    return content_based_recommendations

- The function content_based_recommendations generates song recommendations based on the input song name. It first checks if the song exists in the music_df DataFrame, which contains music data including track names, artists, album names, release dates, and popularity. If the song is found, the function retrieves its index and compares its audio features with those of other songs using cosine similarity from scikit-learn.

- It then identifies the top num_recommendations most similar songs by sorting the similarity scores in descending order and excluding the input song itself. Finally, it extracts and returns the details of these similar songs (track name, artists, album name, release date, and popularity) from music_df.

Function to generate music recommendations based on weighted popularity and combine it with the recommendations of the content-based filtering method