In [149]:
import pandas as pd
import numpy as np



In [150]:
import requests

def read_access_token(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()
    
access_token = read_access_token('access_token.txt')


def get_spotify_user_profile(access_token):
    url = 'https://api.spotify.com/v1/me'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json()  # Returns user profile data
    else:
        print(f"Error fetching user profile: {response.status_code}")
        print(f"Response: {response.text}")
        return None

user_profile = get_spotify_user_profile(access_token)
print(user_profile)

def get_spotify_top_data(access_token, data_type='tracks'):
    url = f'https://api.spotify.com/v1/me/top/{data_type}'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return [item['id'] for item in response.json()['items']]
    else:
        # Log the status code and response for debugging
        print(f"Error fetching top {data_type}: {response.status_code}")
        print(f"Response: {response.json()}")
        return []


access_token = read_access_token('access_token.txt')
top_tracks = get_spotify_top_data(access_token, 'tracks')
top_artists = get_spotify_top_data(access_token, 'artists')

{'display_name': 'santiagofischel', 'external_urls': {'spotify': 'https://open.spotify.com/user/santiagofischel'}, 'href': 'https://api.spotify.com/v1/users/santiagofischel', 'id': 'santiagofischel', 'images': [{'url': 'https://i.scdn.co/image/ab67757000003b82767ddaf83ec34926931494c7', 'height': 64, 'width': 64}, {'url': 'https://i.scdn.co/image/ab6775700000ee85767ddaf83ec34926931494c7', 'height': 300, 'width': 300}], 'type': 'user', 'uri': 'spotify:user:santiagofischel', 'followers': {'href': None, 'total': 68}}


In [151]:
def get_playlist_tracks(access_token, playlist_id):
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        try:
            items = response.json().get('items')
            if items is not None:
                # Filter out any items where 'track' is None or 'track' does not have an 'id'
                valid_tracks = [item['track']['id'] for item in items if item.get('track') and item['track'].get('id')]
                return valid_tracks
            else:
                print(f"Warning: 'items' key not found in the response for playlist ID {playlist_id}.")
                return []
        except ValueError as e:
            print(f"Error parsing JSON from the response: {e}")
            return []
    else:
        print(f"Failed to fetch tracks for playlist ID {playlist_id}. Status code: {response.status_code}")
        return []


# Get all playlists
def get_all_user_playlists(access_token):
    url = 'https://api.spotify.com/v1/me/playlists'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers)
    playlists = response.json()['items'] if response.status_code == 200 else []
    return playlists

# Fetch all tracks from all playlists
def get_all_tracks_from_playlists(access_token):
    playlists = get_all_user_playlists(access_token)
    all_tracks = set()
    for playlist in playlists:
        tracks = get_playlist_tracks(access_token, playlist['id'])
        all_tracks.update(tracks)
    return all_tracks

access_token = read_access_token('access_token.txt')  # Update the path to your access token file
playlist_tracks = get_all_tracks_from_playlists(access_token)


In [152]:
playlist_tracks

{'00NAQYOP4AmWR549nnYJZu',
 '00hdjyXt6MohKnCyDmhxOL',
 '01DOm09B2AbiTARAbXVI6g',
 '03t183ijgT3kua3Bk7XHpP',
 '06s3QtMJVXw1AJX3UfvZG1',
 '07w6jp4Fq8GGqhLoDYoVkb',
 '0AjNBgkkdeUyj4HucyGRr8',
 '0DMRKqrpd0QO4RWiEdMib0',
 '0ESJlaM8CE1jRWaNtwSNj8',
 '0G4QCpJMA6u8MGXxzg4gm9',
 '0GONea6G2XdnHWjNZd6zt3',
 '0GbrwlmnRMJOQ5bRV9QcZa',
 '0HB4odbGJbgR0ffA4rJoxS',
 '0HDudjyoG3UpWIMAJrXIaB',
 '0I3q5fE6wg7LIfHGngUTnV',
 '0L7UzdYmOkLhHoDLEcUa8f',
 '0Nwar8rweBUyfyuoyjjUvk',
 '0OIkGLJOrJtrQ9pm805I5m',
 '0P6AWOA4LG1XOctzaVu5tt',
 '0RbW8kWozrVSIGb4V13o0o',
 '0S9ZVNTNRzLMgb96tmmG1u',
 '0SbDNXZYqfsMarINcb72X5',
 '0SqqAgdovOE24BzxIClpjw',
 '0V4l4GQhgnWQGtCWpvA7va',
 '0W1jh3ExLv0nMvvmNJOBxh',
 '0Wi7iEDGFY7I7NkYU6gS4n',
 '0aVrpFRLlrd5zVyPXWP3mS',
 '0aym2LBJBk9DAYuHHutrIl',
 '0bRXwKfigvpKZUurwqAlEh',
 '0c64nOsaJu7LSrX9pI4124',
 '0dFdGPVLs3k0z9ezYWZzUa',
 '0dWOFwdXrbBUYqD9DLsoyK',
 '0dbTQYW3Ad1FTzIA9t90E8',
 '0eXIlXgGQTD6hezaxdl3wO',
 '0gzqZ9d1jIKo9psEIthwXe',
 '0h4jHdNzUrV9eGR2PT5R8c',
 '0jeS8PyyIRAG0A3t5DtL71',
 

In [153]:

file_path = 'dataset_ready.csv'
final_df = pd.read_csv(file_path)
final_df.head()


Unnamed: 0.1,Unnamed: 0,user,song,title,play_count,release,artist_name,year,danceability,energy,...,valence,tempo,user_encoded,song_encoded,title_encoded,release_encoded,artist_name_encoded,year_encoded,key_encoded,mode_encoded
0,0,a4f803a42f03018ccfa4c8a27e330df40be8b9b9,SOPSOHT12A67AE0235,Almaz,1,Every Kind Of Mood,Randy Crawford,1986.0,0.527093,0.492865,...,0.402229,4.920032,5640,4406,233,1455,2614,33,2,0
1,1,0878045f0dc2968f0403a26e6fd8bda99d19d0db,SOXIGHW12A6D4F7245,Weak In The Knees,2,If Your Memory Serves You Well,Serena Ryder,2006.0,0.478096,0.536493,...,0.406282,4.962754,306,6396,6463,2121,2827,53,6,2
2,2,2bd8c53931b30461ecf79469c12099a5e25f6618,SOAZTAD12A8C14494A,Until We're Dead,1,Until We're Dead,Star Fucking Hipsters,2008.0,0.223943,0.659073,...,0.141844,4.751096,1539,283,6270,4981,2979,55,1,2
3,3,1adcd0137fabfe2c9b2828caf5b1c95bf7994337,SODTEIO12A8AE46F47,Gas Can Row,1,Head Home,O'Death,2007.0,0.479682,0.476091,...,0.493547,4.733682,929,1073,2003,1903,2378,54,5,1
4,4,e8a55ac62f27b90294c126358ef5409b6a341fcd,SOWSEEJ12AB01893F4,Everything Looks Beautiful On Video,5,Stop The Future,The Epoxies,2005.0,0.367417,0.638691,...,0.64539,5.175799,7917,6228,1690,4014,3219,52,8,2


In [154]:
final_df.drop('Unnamed: 0', axis=1, inplace=True)


In [155]:
final_df

Unnamed: 0,user,song,title,play_count,release,artist_name,year,danceability,energy,key,...,valence,tempo,user_encoded,song_encoded,title_encoded,release_encoded,artist_name_encoded,year_encoded,key_encoded,mode_encoded
0,a4f803a42f03018ccfa4c8a27e330df40be8b9b9,SOPSOHT12A67AE0235,Almaz,1,Every Kind Of Mood,Randy Crawford,1986.0,0.527093,0.492865,2.00,...,0.402229,4.920032,5640,4406,233,1455,2614,33,2,0
1,0878045f0dc2968f0403a26e6fd8bda99d19d0db,SOXIGHW12A6D4F7245,Weak In The Knees,2,If Your Memory Serves You Well,Serena Ryder,2006.0,0.478096,0.536493,5.00,...,0.406282,4.962754,306,6396,6463,2121,2827,53,6,2
2,2bd8c53931b30461ecf79469c12099a5e25f6618,SOAZTAD12A8C14494A,Until We're Dead,1,Until We're Dead,Star Fucking Hipsters,2008.0,0.223943,0.659073,1.00,...,0.141844,4.751096,1539,283,6270,4981,2979,55,1,2
3,1adcd0137fabfe2c9b2828caf5b1c95bf7994337,SODTEIO12A8AE46F47,Gas Can Row,1,Head Home,O'Death,2007.0,0.479682,0.476091,4.24,...,0.493547,4.733682,929,1073,2003,1903,2378,54,5,1
4,e8a55ac62f27b90294c126358ef5409b6a341fcd,SOWSEEJ12AB01893F4,Everything Looks Beautiful On Video,5,Stop The Future,The Epoxies,2005.0,0.367417,0.638691,7.00,...,0.645390,5.175799,7917,6228,1690,4014,3219,52,8,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,b61afb42335287239bd40e1dea50d849cbf8a9a9,SOUTBTE12A8C135491,Being Bad Feels Pretty Good,4,You Have No Idea What You're Getting Yourself ...,Does It Offend You_ Yeah?,2008.0,0.408128,0.649195,6.00,...,0.623100,4.990194,6181,5688,541,5312,929,55,7,2
8996,7936ceedce6ff5ec06514f0dab6d153762c78509,SOSROFB12AAF3B4C5D,You Belong With Me,3,Fearless,Taylor Swift,2008.0,0.489806,0.572673,6.00,...,0.480243,4.875449,4146,5157,6778,1543,3102,55,7,2
8997,a5528b6b7f12fe20d5795b1d82c968a0d0668b82,SOXLJXH12A8C13D903,Storm,1,Who We Are,Lifehouse,2005.0,0.342170,0.259283,7.00,...,0.077001,4.684314,5647,6424,5338,5221,1893,52,8,2
8998,6f5710b29f4d04db6451879607382d6d5d1308d5,SOWGIBZ12A8C136A2E,King Of The Rodeo,3,Aha Shake Heartbreak,Kings Of Leon,2004.0,0.466874,0.647627,9.00,...,0.722391,5.025819,3795,6094,3050,239,1781,51,10,2


In [156]:


final_df['play_count_log'] = np.log1p(final_df['play_count'])


In [157]:
import requests
import pandas as pd

def read_access_token(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

access_token = read_access_token('access_token.txt')



In [158]:
def get_spotify_top_data(access_token, data_type='tracks', limit=50):
    url = f'https://api.spotify.com/v1/me/top/{data_type}'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers, params={'limit': limit})
    
    if response.status_code == 200:
        items = response.json()['items']
        return [item['id'] for item in items], [item for item in items]
    else:
        print(f"Error fetching top {data_type}: {response.status_code}")
        return [], []

top_tracks_ids, top_tracks_items = get_spotify_top_data(access_token, 'tracks')
top_artists_ids, _ = get_spotify_top_data(access_token, 'artists')


In [159]:
def fetch_audio_features(track_ids):
    url = 'https://api.spotify.com/v1/audio-features'
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers, params={'ids': ','.join(track_ids)})
    
    if response.status_code == 200:
        return response.json()['audio_features']
    else:
        print(f"Error fetching audio features: {response.status_code}")
        return []

audio_features = fetch_audio_features(top_tracks_ids)


In [160]:
def aggregate_user_profile(audio_features):
    # Convert the list of audio features into a DataFrame
    features_df = pd.DataFrame(audio_features)
    
    # Select relevant features for aggregation
    relevant_features = ['danceability', 'energy', 'valence', 'tempo', 'acousticness', 'instrumentalness', 'speechiness', 'liveness']
    profile = features_df[relevant_features].mean().to_dict()
    
    return profile

user_profile = aggregate_user_profile(audio_features)
print(user_profile)


{'danceability': 0.6309400000000001, 'energy': 0.6156400000000001, 'valence': 0.6014400000000001, 'tempo': 117.07244, 'acousticness': 0.29918000000000006, 'instrumentalness': 0.08284788539999999, 'speechiness': 0.15499000000000002, 'liveness': 0.250442}


In [161]:
def aggregate_user_profile(audio_features):
    # Convert the list of audio features into a DataFrame
    features_df = pd.DataFrame(audio_features)
    
    # Select relevant features for aggregation
    relevant_features = ['danceability', 'energy', 'valence', 'tempo', 'acousticness', 'instrumentalness', 'speechiness', 'liveness']
    profile = features_df[relevant_features].mean().to_dict()
    
    return profile

user_profile = aggregate_user_profile(audio_features)
print(user_profile)


{'danceability': 0.6309400000000001, 'energy': 0.6156400000000001, 'valence': 0.6014400000000001, 'tempo': 117.07244, 'acousticness': 0.29918000000000006, 'instrumentalness': 0.08284788539999999, 'speechiness': 0.15499000000000002, 'liveness': 0.250442}


In [162]:
for feature in user_profile.keys():
    # Creating a new column for each differential feature
    final_df[f'{feature}_diff'] = abs(final_df[feature] - user_profile[feature])

In [163]:
final_df

Unnamed: 0,user,song,title,play_count,release,artist_name,year,danceability,energy,key,...,mode_encoded,play_count_log,danceability_diff,energy_diff,valence_diff,tempo_diff,acousticness_diff,instrumentalness_diff,speechiness_diff,liveness_diff
0,a4f803a42f03018ccfa4c8a27e330df40be8b9b9,SOPSOHT12A67AE0235,Almaz,1,Every Kind Of Mood,Randy Crawford,1986.0,0.527093,0.492865,2.00,...,0,0.693147,0.103847,0.122775,0.199211,112.152408,0.193074,0.078663,0.123394,0.129110
1,0878045f0dc2968f0403a26e6fd8bda99d19d0db,SOXIGHW12A6D4F7245,Weak In The Knees,2,If Your Memory Serves You Well,Serena Ryder,2006.0,0.478096,0.536493,5.00,...,2,1.098612,0.152844,0.079147,0.195158,112.109686,0.040669,0.082843,0.125043,0.137113
2,2bd8c53931b30461ecf79469c12099a5e25f6618,SOAZTAD12A8C14494A,Until We're Dead,1,Until We're Dead,Star Fucking Hipsters,2008.0,0.223943,0.659073,1.00,...,2,0.693147,0.406997,0.043433,0.459596,112.321344,0.299154,0.140050,0.045239,0.168954
3,1adcd0137fabfe2c9b2828caf5b1c95bf7994337,SODTEIO12A8AE46F47,Gas Can Row,1,Head Home,O'Death,2007.0,0.479682,0.476091,4.24,...,1,0.693147,0.151258,0.139549,0.107893,112.338758,0.058139,0.006535,0.119942,0.108751
4,e8a55ac62f27b90294c126358ef5409b6a341fcd,SOWSEEJ12AB01893F4,Everything Looks Beautiful On Video,5,Stop The Future,The Epoxies,2005.0,0.367417,0.638691,7.00,...,2,1.791759,0.263523,0.023051,0.043950,111.896641,0.296972,0.080974,0.115193,0.089595
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,b61afb42335287239bd40e1dea50d849cbf8a9a9,SOUTBTE12A8C135491,Being Bad Feels Pretty Good,4,You Have No Idea What You're Getting Yourself ...,Does It Offend You_ Yeah?,2008.0,0.408128,0.649195,6.00,...,2,1.609438,0.222812,0.033555,0.021660,112.082246,0.298240,0.082834,0.105533,0.150597
8996,7936ceedce6ff5ec06514f0dab6d153762c78509,SOSROFB12AAF3B4C5D,You Belong With Me,3,Fearless,Taylor Swift,2008.0,0.489806,0.572673,6.00,...,2,1.386294,0.141134,0.042967,0.121197,112.196991,0.238744,0.082848,0.120975,0.165641
8997,a5528b6b7f12fe20d5795b1d82c968a0d0668b82,SOXLJXH12A8C13D903,Storm,1,Who We Are,Lifehouse,2005.0,0.342170,0.259283,7.00,...,2,0.693147,0.288770,0.356357,0.524439,112.388126,0.336868,0.082761,0.117983,0.142485
8998,6f5710b29f4d04db6451879607382d6d5d1308d5,SOWGIBZ12A8C136A2E,King Of The Rodeo,3,Aha Shake Heartbreak,Kings Of Leon,2004.0,0.466874,0.647627,9.00,...,2,1.386294,0.164066,0.031987,0.120951,112.046621,0.288438,0.080528,0.124073,0.174593


In [164]:
for col in final_df.columns:
    if final_df[col].dtype == 'int64':
        final_df[col] = final_df[col].astype('int32')
    elif final_df[col].dtype == 'float64':
        final_df[col] = final_df[col].astype('float32')

In [165]:
from sklearn.model_selection import train_test_split

predictor_columns = ['song_encoded','energy','tempo','danceability_diff', 'energy_diff', 'valence_diff', 'tempo_diff', 'acousticness_diff', 'instrumentalness_diff', 'speechiness_diff', 'liveness_diff']

X = final_df[predictor_columns]

y = final_df['play_count_log']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [166]:
import lightgbm as lgb

# Convert the datasets into LightGBM dataset format
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Set the parameters for the model
params = {
            'objective': 'regression',
            'metric': 'rmse',
            'num_leaves': 32,
            'learning_rate': 0.1,
            'feature_fraction': 0.8,
            'bagging_fraction': 0.9,
            'bagging_freq': 5,
            'verbose': -1,
            'max_depth': -1,
            'min_data_in_leaf': 20,
            'lambda_l1': 0.5,
            'lambda_l2': 0.5
        }

# Train the model
num_round = 100
bst = lgb.train(params, train_data, num_boost_round=num_round, valid_sets=[test_data])

In [167]:
# Predict on the test set
y_pred = bst.predict(X_test)


In [168]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt

# Calculate RMSE
rmse = sqrt(mean_squared_error(y_test, y_pred))

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


RMSE: 0.6288866202837111
MAE: 0.4593086970903609


In [169]:
# Create a copy of X_test to avoid modifying the original DataFrame
recommended_songs = X_test.copy()

# Add the predicted play_count_log to this new DataFrame
recommended_songs['predicted_play_count_log'] = y_pred


In [170]:
print(recommended_songs[['predicted_play_count_log', 'energy', 'tempo_diff']].describe())

       predicted_play_count_log  energy_diff   tempo_diff
count               1800.000000  1800.000000  1800.000000
mean                   1.072171     0.135070   112.295982
std                    0.160901     0.115241     0.211649
min                    0.674073     0.000086   111.692894
25%                    0.967316     0.048056   112.166479
50%                    1.077902     0.137544   112.338760
75%                    1.128302     0.154425   112.401335
max                    2.091430     0.609330   113.447723


In [174]:
# Assume heart_rate is obtained from user input
heart_rate = 150  # Example

# Calculate tempo thirds for the dataset
tempo_min, tempo_max = recommended_songs['tempo'].min(), recommended_songs['tempo'].max()
tempo_third = (tempo_max - tempo_min) / 3

# Determine the desired tempo range based on heart rate
if heart_rate < 100:
    tempo_low, tempo_high = tempo_min, tempo_min + tempo_third
elif heart_rate <= 140:
    tempo_low, tempo_high = tempo_min + tempo_third, tempo_min + 2 * tempo_third
else:  # heart_rate > 140
    tempo_low, tempo_high = tempo_min + 2 * tempo_third, tempo_max

# Filter songs within the desired tempo range
matching_songs = recommended_songs[(recommended_songs['tempo'] >= tempo_low) & (recommended_songs['tempo'] <= tempo_high)]

# Sort remaining songs by predicted play count log, assuming higher is better
sorted_songs = matching_songs.sort_values(by='predicted_play_count_log', ascending=False)

# Extract the top 10 recommendations
top_10_recommendations = sorted_songs.head(10)


In [175]:
top_10_with_names = top_10_recommendations.merge(final_df[['song_encoded', 'title', 'artist_name']], 
                                                  on='song_encoded', 
                                                  how='left')

# Drop duplicates to ensure each song title appears only once
top_10_with_names = top_10_with_names.drop_duplicates(subset='title', keep='first')

In [176]:
print(top_10_with_names[['title', 'artist_name', 'predicted_play_count_log', 'tempo_diff']])

                     title       artist_name  predicted_play_count_log  \
0                    Fader   The Temper Trap                  2.060144   
2               What's Up?     4 Non Blondes                  2.040001   
5   Make Love To Your Mind      Bill Withers                  1.969399   
8                   Canada  Five Iron Frenzy                  1.875820   
24       Burden In My Hand       Soundgarden                  1.669284   
26                 Majesty        Delirious?                  1.610533   
29              The Middle   Jimmy Eat World                  1.602292   
37                      Tú           Shakira                  1.553179   

    tempo_diff  
0   112.159988  
2   112.164764  
5   112.232758  
8   111.902573  
24  111.857338  
26  112.108643  
29  111.977760  
37  112.125053  


In [177]:
# make a csv out of top_10_with_names[['title', 'artist_name', 'predicted_play_count_log', 'tempo_diff']
top_10_with_names[['title', 'artist_name', 'predicted_play_count_log', 'tempo_diff']].to_csv('top_10_recommendations.csv', index=False)