# Music Reccomendation System Using Spotify API

##### 'Requests' is used to make HTTP requests to the Spotify API for accessing music data.
##### 'Base64' is used to encode client credentials (ID and secret) in Base64 format for Spotify's authorization.

In [33]:
import requests
import base64

##### The access token serves as a temporary authorization credential, allowing the code to make authenticated requests to the Spotify API on behalf of the application. 
##### The access token serves as a temporary authorization credential, allowing the code to make authenticated requests to the Spotify API on behalf of the application. 
##### The Client ID identifies the application, while the Client Secret is a confidential key used for authentication.
##### With the access token, the application can now make authorized requests to retrieve music data, such as tracks, albums, artists, and user information, which is fundamental for building a music recommendation system using the Spotify API and Python.

In [None]:
#  Replace with your own Client ID and Client Secret
CLIENT_ID = '95faf4562b1042c1ab7d29ba937e63e8'
CLIENT_SECRET = 'e546b455a08b45d892dcf29d142ac269'

#  Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

##### You need to install the Spotipy library, which is a Python library providing access to Spotify’s web API.
##### The access_token allows the function to make authorized requests to access Spotify’s resources.
##### The sp.playlist_tracks method retrieves the playlist tracks.

In [None]:
# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

##### You need to install the Spotipy library, which is a Python library providing access to Spotify’s web API.
##### The sp.playlist_tracks method retrieves the playlist tracks.
##### Track information is stored in a list of dictionaries called music_data.
##### The function uses the sp.audio_features method to fetch audio features for each track in the playlist. 
##### The function then creates a DataFrame from the music_data list. 

In [15]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_id, access_token):
    # Set up Spotipy with the access token
    sp = spotipy.Spotify(auth=access_token)

    # Get the tracks from the playlist
    playlist_tracks = sp.playlist_tracks(playlist_id, fields='items(track(id, name, artists, album(id, name)))')

    # Extract relevant information and store in a list of dictionaries
    music_data = []
    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track['artists']])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        # Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

        # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        # Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != 'Not available' else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity = None

        # Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)

    # Create a pandas DataFrame from the list of dictionaries
    df = pd.DataFrame(music_data)

    return df

##### We then use the function to collect music data from any playlist on Spotify.
##### The code then calls the get_trending_playlist_data function to extract music data from the specified playlist using the provided access_token.

In [14]:
playlist_id = '0m1VJLRGMhC9CFdNczFad9'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trending_playlist_data(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                                  Track Name             Artists  \
0                                      Views               Drake   
1                                   Lose You               Drake   
2                            6PM In New York               Drake   
3                                 Lord Knows    Drake, Rick Ross   
4                          Weston Road Flows               Drake   
5                        30 for 30 Freestyle               Drake   
6                             Do Not Disturb               Drake   
7                        Diplomatic Immunity               Drake   
8                                 Free Smoke               Drake   
9                             Summer Sixteen               Drake   
10         Pound Cake / Paris Morton Music 2        Drake, JAY-Z   
11                   0 To 100 / The Catch Up               Drake   
12                               Emotionless               Drake   
13                            Tuscan Leather    

##### Check if the data has any null values or not

In [17]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


##### Import the necessary Python libraries.

In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = music_df
data

Unnamed: 0,Track Name,Artists,Album Name,Album ID,Track ID,Popularity,Release Date,Duration (ms),Explicit,External URLs,...,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo
0,Views,Drake,Views,40GMAhriYJRO1rsY4YdrZb,7MjSipTto9QljYzZnloXOn,57,2016-05-06,311960,True,https://open.spotify.com/track/7MjSipTto9QljYz...,...,0.852,5,-5.896,1,0.37,0.0657,0.0,0.262,0.112,76.428
1,Lose You,Drake,More Life,7Ix0FS4f1lK42C3rix5rHg,465I5rJp4tgLMSwqkqmZOF,0,2017-03-18,305374,True,https://open.spotify.com/track/465I5rJp4tgLMSw...,...,0.583,4,-9.172,1,0.376,0.699,0.0,0.372,0.378,89.835
2,6PM In New York,Drake,If You're Reading This It's Too Late,0ptlfJfwGTy0Yvrk14JK1I,5mZJwWdxAOR4xUvSGZvvMU,55,2015-02-12,283307,True,https://open.spotify.com/track/5mZJwWdxAOR4xUv...,...,0.85,5,-4.155,1,0.251,0.107,0.0,0.155,0.383,128.429
3,Lord Knows,"Drake, Rick Ross",Take Care (Deluxe),6X1x82kppWZmDzlXXK3y3q,1QBwk6GTCxVdC2hoSw9tlM,59,2011-11-15,307640,True,https://open.spotify.com/track/1QBwk6GTCxVdC2h...,...,0.887,5,-5.551,0,0.367,0.0786,0.0,0.834,0.303,167.579
4,Weston Road Flows,Drake,Views,40GMAhriYJRO1rsY4YdrZb,4PA16FAl8LPmFmOhARawdV,59,2016-05-06,253533,True,https://open.spotify.com/track/4PA16FAl8LPmFmO...,...,0.74,1,-6.942,1,0.307,0.0635,0.0,0.366,0.35,82.877
5,30 for 30 Freestyle,Drake,What A Time To Be Alive,1ozpmkWcCHwsQ4QTnxOOdT,1DmnEYXa4WfbdhAPwNzgD8,52,2015-09-20,253935,True,https://open.spotify.com/track/1DmnEYXa4WfbdhA...,...,0.619,4,-9.143,0,0.398,0.671,0.0,0.123,0.317,89.252
6,Do Not Disturb,Drake,More Life,1lXY618HWkwYKJWBRYR4MK,2KvHC9z14GSl4YpkNMX384,72,2017-03-18,283551,True,https://open.spotify.com/track/2KvHC9z14GSl4Yp...,...,0.693,7,-5.943,0,0.45,0.246,0.0,0.112,0.454,170.982
7,Diplomatic Immunity,Drake,Scary Hours,1r0DOIO0iC0bGpMtWRFdde,5goGDc74vVREyN8al8CkPh,0,2018-01-20,255840,True,https://open.spotify.com/track/5goGDc74vVREyN8...,...,0.839,9,-5.076,0,0.443,0.00534,0.0,0.308,0.55,74.867
8,Free Smoke,Drake,More Life,1lXY618HWkwYKJWBRYR4MK,05KOgYg8PGeJyyWBPi5ja8,58,2017-03-18,218674,True,https://open.spotify.com/track/05KOgYg8PGeJyyW...,...,0.491,10,-6.892,0,0.439,0.0681,0.0,0.571,0.203,141.979
9,Summer Sixteen,Drake,Summer Sixteen,3DxEfEe94nVL6iXCT80rFv,17Q87zeXgsAi9iQQbMu9v0,51,2016-02-05,202451,True,https://open.spotify.com/track/17Q87zeXgsAi9iQ...,...,0.46,1,-7.607,1,0.382,0.00354,2.7e-05,0.147,0.0541,146.274


##### We write a function to give more weight to the latest releases in the recommendations. 
##### It uses the datetime.strptime function from the Python datetime module to convert the release date string to a datetime object. 
##### The function calculates the time span between the release date of the track and the current date (today’s date) using datetime.now() – release_date.
##### The weighted popularity score is calculated as weight = 1 / (time_span.days + 1), ensuring non-zero weight by adding 1 to the days since release.
##### This formula assigns higher weights to recent releases, making them more influential in recommendations to reflect user interest in newer music.

In [20]:
# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    # Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    # Calculate the time span between release date and today's date
    time_span = datetime.now() - release_date

    # Calculate the weighted popularity score based on time span (e.g., more recent releases have higher weight)
    weight = 1 / (time_span.days + 1)
    return weight

##### We now normalize the music features.

In [21]:
# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values
music_features_scaled = scaler.fit_transform(music_features)

#### A hybrid recommendation system combines multiple techniques, like collaborative and content-based filtering, to offer more accurate and diverse recommendations by leveraging the strengths of each approach. 
#### For music recommendations, we will create a system that combines recommendations based on music audio features with recommendations based on weighted popularity.

##### The function takes input_song_name as input to generate song recommendations by checking if it exists in the music_df DataFrame, which holds data such as "Track Name," "Artists," "Album Name," "Release Date," and "Popularity."
##### If the song is found, the function retrieves its index in music_df to compare its audio features with those of other songs.
##### Using cosine similarity (from scikit-learn), the function calculates similarity scores between the input song and all other songs based on their audio features.
##### It then selects the top num_recommendations most similar songs, excluding the input song itself, and retrieves their details from music_df for the recommendations.

In [22]:
# a function to get content-based recommendations based on music features
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get the index of the input song in the music DataFrame
    input_song_index = music_df[music_df['Track Name'] == input_song_name].index[0]

    # Calculate the similarity scores based on music features (cosine similarity)
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    # Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]

    # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations

##### The hybrid approach generates personalized recommendations by considering both the content similarity of songs and their weighted popularity. It first retrieves content-based recommendations for the input song by calling the content_based_recommendations function, based on the num_recommendations parameter.
##### The function calculates the input song's popularity score from music_df and computes a weighted popularity score using the calculate_weighted_popularity function, with the alpha parameter controlling the balance between content and popularity.
##### These content-based recommendations and the input song’s details are combined into a hybrid_recommendations DataFrame, which includes track name, artists, album, release date, popularity, and weighted popularity score.
##### Finally, the hybrid_recommendations DataFrame is sorted by weighted popularity, ensuring the most relevant and popular songs appear at the top, and the input song is excluded from the final recommendations.

In [23]:
import pandas as pd

def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)

    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]
    )

    new_entry = pd.DataFrame({
        'Track Name': [input_song_name],
        'Artists': [music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0]],
        'Album Name': [music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0]],
        'Release Date': [music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0]],
        'Popularity': [weighted_popularity_score]
    })

    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)

    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]

    return hybrid_recommendations

### We can test the final function to generate music recommendations.

In [29]:
input_song_name = "Losses"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Losses':
              Track Name Artists            Album Name Release Date  \
1       Champagne Poetry   Drake   Certified Lover Boy   2021-09-03   
4  Jumbotron Shit Poppin   Drake              Her Loss   2022-11-04   
0    Middle of the Ocean   Drake              Her Loss   2022-11-04   
3           Deep Pockets   Drake  Dark Lane Demo Tapes   2020-05-01   
2    Diplomatic Immunity   Drake           Scary Hours   2018-01-20   

   Popularity  
1        69.0  
4        64.0  
0        60.0  
3        51.0  
2         0.0  
