<a href="https://colab.research.google.com/github/Lactase/Python-Projects/blob/main/Spotify_Likeability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import random
import requests
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import json

In [2]:
# Import csv from exportify
df = pd.read_csv('liked_songs.csv')

# Removes the spotify:track part and leaves the track id
df['Track ID'] = df['Track URI'].str.replace('spotify:track:', '', regex=False)

# Can be changed according to music taste
cond_tempo = df['Tempo'] > 110
cond_dance = df['Danceability'] > 0.5
cond_energy = df['Energy'] > 0.5
cond_valence = df['Valence'] > 0.1
cond_speech = df['Speechiness'] < 0.1
cond_acoustic = df['Acousticness'] < 0.2
cond_instrumental = df['Instrumentalness'] < 0.1
cond_liveness = df['Liveness'] > 0.1

# Create Liked column and true if all criteria are met
df['Liked'] = cond_tempo & cond_dance & cond_energy & cond_valence & cond_speech & cond_acoustic & cond_instrumental & cond_liveness

# Convert to int
df['Liked'] = df['Liked'].astype(int)
df['Liked'].sum()
df

Unnamed: 0,Track URI,Track Name,Album Name,Artist Name(s),Release Date,Duration (ms),Popularity,Explicit,Added By,Added At,...,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Time Signature,Track ID,Liked
0,spotify:track:5DxDLsW6PsLz5gkwC7Mk5S,Free,KPop Demon Hunters (Soundtrack from the Netfli...,"Rumi,Jinu,EJAE,Andrew Choi,KPop Demon Hunters ...",2025-06-20,187963,91,False,,2025-06-28T20:52:03Z,...,1,0.0706,0.22500,0.000000,0.0991,0.437,139.781,4,5DxDLsW6PsLz5gkwC7Mk5S,0
1,spotify:track:1CPZ5BxNNd0n0nF4Orb9JS,Golden,KPop Demon Hunters (Soundtrack from the Netfli...,"HUNTR/X,EJAE,AUDREY NUNA,REI AMI,KPop Demon Hu...",2025-06-20,194607,94,False,,2025-06-28T20:52:00Z,...,0,0.0616,0.09660,0.000000,0.1850,0.104,122.715,3,1CPZ5BxNNd0n0nF4Orb9JS,1
2,spotify:track:3RXUgPNIbUgFxsDWuBQEt6,How It’s Done,KPop Demon Hunters (Soundtrack from the Netfli...,"HUNTR/X,EJAE,AUDREY NUNA,REI AMI,KPop Demon Hu...",2025-06-20,176059,91,False,,2025-06-28T20:51:57Z,...,1,0.0967,0.01140,0.000000,0.3220,0.614,160.011,4,3RXUgPNIbUgFxsDWuBQEt6,1
3,spotify:track:5EoV2HmAlkqN1XvsIjOBZZ,STYLE,STYLE,Hearts2Hearts,2025-06-18,209671,81,False,,2025-06-28T19:14:26Z,...,1,0.0630,0.28200,0.000002,0.1520,0.676,127.003,4,5EoV2HmAlkqN1XvsIjOBZZ,0
4,spotify:track:40UVUuyy5RfpO0awclCttA,FAMOUS,FAMOUS,ALLDAY PROJECT,2025-06-23,180353,79,False,,2025-06-28T19:14:15Z,...,0,0.0570,0.07080,0.000000,0.1060,0.485,119.977,4,40UVUuyy5RfpO0awclCttA,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
528,spotify:track:5Wgl3N3oJ5eRtksKPRaO4j,BLOSSOM (Prod. Groovyroom),THE LOVE OF SPRING,"EUNHA,RAVI",2019-04-11,187418,25,False,,2019-04-29T04:34:39Z,...,0,0.3250,0.06660,0.000000,0.0568,0.776,109.714,4,5Wgl3N3oJ5eRtksKPRaO4j,0
529,spotify:track:19y3W6t87XUCkRrNhJkdbh,DANCING ON GLASS,DANCING ON GLASS,BUMKEY,2019-04-05,229998,17,False,,2019-04-25T13:52:04Z,...,0,0.0494,0.02750,0.000000,0.0750,0.603,115.035,4,19y3W6t87XUCkRrNhJkdbh,0
530,spotify:track:7cM9Kqto11iJpY2RwgVJSq,Flare Guns,The Story of Us,"Quinn XCII,Chelsea Cutler",2017-09-15,240013,58,False,,2019-01-11T23:25:28Z,...,0,0.0603,0.26000,0.000000,0.2850,0.457,92.951,4,7cM9Kqto11iJpY2RwgVJSq,0
531,spotify:track:2xLMifQCjDGFmkHkpNLD9h,SICKO MODE,ASTROWORLD,Travis Scott,2018-08-03,312820,86,True,,2018-09-23T17:20:47Z,...,1,0.2220,0.00513,0.000000,0.1240,0.446,155.008,4,2xLMifQCjDGFmkHkpNLD9h,0


In [3]:
# number of songs in csv
n_songs = len(df)

seed = np.random.seed(4200)

X_track_features = df[['Tempo', 'Danceability', 'Energy', 'Valence', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness']].values
y_liked = df['Liked'].values

# scales the data points to mean close to 0 and std of 1
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_track_features)

# Turn scaled values into tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_liked, dtype=torch.float32).unsqueeze(1)


# split data so you have data to train and test on (20% used to test and 80% in training)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_liked, test_size=0.2, random_state=seed)

# Convert training/testing data to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [4]:
# Neural Network

class MusicClassifier(nn.Module):
  def __init__(self):
    # inherits from nn.Module
    super(MusicClassifier, self).__init__()

    # layers
    # connect layers 8 input 12 output to hidden layer
    self.fc1 = nn.Linear(in_features = 8, out_features = 12)

    # RELU
    self.relu = nn.ReLU()

    # connect layers 12 input from hidden layer to 1 output to output
    self.fc2 = nn.Linear(in_features = 12, out_features = 1)

    # Sigmoid
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    # x is the tensor for the song features
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.sigmoid(x)
    return x

model = MusicClassifier()
# fc1 -> relu -> fc2 -> sigmoid

In [5]:
# Loss function
loss_func = nn.BCELoss()
# optimizer with learning rate of 0.01 (model parameters is the weights and biases)
optimizer = optim.Adam(model.parameters(), lr=0.01)
# number of times model will train
epochs = 100

for epoch in range(epochs):
  # start training
  model.train()
  # plugs training data into model
  outputs = model(X_train_tensor)
  # checks output and compare to training data and calculates loss
  loss = loss_func(outputs,y_train_tensor)
  # cleans the gradiant every run
  optimizer.zero_grad()
  # backpropagation
  loss.backward()
  # updates the weights and biases based on loss
  optimizer.step()

  # print epoch and loss every 100
  if (epoch) % 10== 0:
    print(f'Epoch: {epoch}, Loss: {loss.item()}')

Epoch: 0, Loss: 0.7832262516021729
Epoch: 10, Loss: 0.6423134803771973
Epoch: 20, Loss: 0.5671598315238953
Epoch: 30, Loss: 0.5051627159118652
Epoch: 40, Loss: 0.45292899012565613
Epoch: 50, Loss: 0.4033907353878021
Epoch: 60, Loss: 0.3547980487346649
Epoch: 70, Loss: 0.31449246406555176
Epoch: 80, Loss: 0.2845551669597626
Epoch: 90, Loss: 0.2623519003391266


In [6]:
# Evaluate the model
model.eval()
# Calculate accuracy with test data
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = loss_func(test_outputs, y_test_tensor)
    # predicted liked
    predicted_liked = (test_outputs > 0.5).float()
    # Since in tensor type need item to get value to use for accuracy
    accuracy = (predicted_liked == y_test_tensor).sum().item() / y_test_tensor.size(0)

print(f'Test Loss: {test_loss.item():.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

Test Loss: 0.2343
Test Accuracy: 0.8972


In [7]:
# Test any songs
spotify_song_link = 'https://open.spotify.com/track/406IpEtZPvbxApWTGM3twY'
spotify_song_id = spotify_song_link.split('/')[-1]

import requests

url = "https://api.reccobeats.com/v1/track?ids=" + spotify_song_id

payload = {}
headers = {
  'Accept': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)
reccobeats_song_id = response.json()['content'][0]['id']

url = "https://api.reccobeats.com/v1/track/" + reccobeats_song_id + "/audio-features"

response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
try:
  valence = response.json()['valence']
  danceability = response.json()['danceability']
  energy = response.json()['energy']
  tempo = response.json()['tempo']
  speechiness = response.json()['speechiness']
  acousticness = response.json()['acousticness']
  instrumentalness = response.json()['instrumentalness']
  liveness = response.json()['liveness']

  X_track_features = np.array([[tempo, danceability, energy, valence, speechiness, acousticness, instrumentalness, liveness]])

  # scales the data points to mean close to 0 and std of 1
  X_scaled = scaler.transform(X_track_features)

  model_input = torch.tensor(X_scaled, dtype=torch.float32)
  with torch.no_grad():
    outputs = model(model_input)
  probability = outputs.detach().numpy()
  probability = np.round(probability.item(), 4)*100
  print(f'The song is {probability}% likely to be liked')
except:
  print('No Audio Features found')



{"content":[{"id":"fe75a3b3-d9eb-4c1c-bca8-52c28bf054b9","trackTitle":"HOT","artists":[{"id":"7e8edb62-c8fc-4931-8833-a28a6607e944","name":"LE SSERAFIM","href":"https://open.spotify.com/artist/4SpbR6yFEvexJuaBpgAU5p"}],"durationMs":143797,"isrc":"USA2P2506404","ean":null,"upc":null,"href":"https://open.spotify.com/track/406IpEtZPvbxApWTGM3twY","availableCountries":"AR,AU,AT,BE,BO,BR,BG,CA,CL,CO,CR,CY,CZ,DK,DO,DE,EC,EE,SV,FI,FR,GR,GT,HN,HK,HU,IS,IE,IT,LV,LT,LU,MY,MT,MX,NL,NZ,NI,NO,PA,PY,PE,PH,PL,PT,SG,SK,ES,SE,CH,TW,TR,UY,US,GB,AD,LI,MC,ID,JP,TH,VN,RO,IL,ZA,SA,AE,BH,QA,OM,KW,EG,MA,DZ,TN,LB,JO,PS,IN,BY,KZ,MD,UA,AL,BA,HR,ME,MK,RS,SI,KR,BD,PK,LK,GH,KE,NG,TZ,UG,AG,AM,BS,BB,BZ,BT,BW,BF,CV,CW,DM,FJ,GM,GE,GD,GW,GY,HT,JM,KI,LS,LR,MW,MV,ML,MH,FM,NA,NR,NE,PW,PG,PR,WS,SM,ST,SN,SC,SL,SB,KN,LC,VC,SR,TL,TO,TT,TV,VU,AZ,BN,BI,KH,CM,TD,KM,GQ,SZ,GA,GN,KG,LA,MO,MR,MN,NP,RW,TG,UZ,ZW,BJ,MG,MU,MZ,AO,CI,DJ,ZM,CD,CG,IQ,LY,TJ,VE,ET,XK","popularity":82}]}
{"id":"fe75a3b3-d9eb-4c1c-bca8-52c28bf054b9","acousticnes

In [8]:
# Test with another playlist
df = pd.read_csv('friend_playlist.csv')
# Removes the spotify:track part and leaves the track id
df['Track ID'] = df['Track URI'].str.replace('spotify:track:', '', regex=False)
df.head()

X_track_features = df[['Tempo', 'Danceability', 'Energy', 'Valence', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness']].values

# scales the data points to mean close to 0 and std of 1
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_track_features)

# Turn scaled values into tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
with torch.no_grad():
  outputs = model(X_tensor)
predicted_liked = (outputs > 0.5).float()
probability = outputs.detach().numpy()
df['Liked'] = predicted_liked.numpy()
df['Probability'] = np.round(probability, 4)
liked_songs = df[df['Liked'] == 1]
df_sorted_desc = liked_songs.sort_values(by='Probability', ascending=False)
df_sorted_desc


Unnamed: 0,Track URI,Track Name,Album Name,Artist Name(s),Release Date,Duration (ms),Popularity,Explicit,Added By,Added At,...,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Time Signature,Track ID,Liked,Probability
15,spotify:track:3JTQgVfYgBwzuvrF1uBYwW,キミの隣,Assortrip,halca,2018,279133,42,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0365,0.0135,1e-06,0.337,0.58,134.971,4,3JTQgVfYgBwzuvrF1uBYwW,1.0,0.8859
62,spotify:track:3dPtXHP0oXQ4HCWHsOA9js,夜に駆ける,夜に駆ける,YOASOBI,2019-12-15,261013,73,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0305,0.00231,1.7e-05,0.3,0.789,130.041,4,3dPtXHP0oXQ4HCWHsOA9js,1.0,0.8744
28,spotify:track:2bMDN6vcYWgfTQIv2T8Liu,Stand By You,Stand By You EP,OFFICIAL HIGE DANDISM,2017-01-01,256342,27,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0439,0.00423,0.0,0.229,0.541,154.074,4,2bMDN6vcYWgfTQIv2T8Liu,1.0,0.8613
20,spotify:track:7v9L3upGtZS12KObGnoJj1,URAOMOTE FORTUNE,HIGECORE!,佐倉千代(CV:小澤亜李),2013,222599,18,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0399,0.0075,3.1e-05,0.382,0.697,131.997,4,7v9L3upGtZS12KObGnoJj1,1.0,0.8365
10,spotify:track:4PRKsgztLyAvUOMqAQIprw,Do I,Do I,LULU X,2017-06-20,230580,1,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0563,0.0139,0.00298,0.347,0.452,128.006,4,4PRKsgztLyAvUOMqAQIprw,1.0,0.7743
46,spotify:track:2LIpx9uryPMS6g7748AfVs,Never going back,CIRCUS,MADKID,2019-04-24,197746,21,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0795,0.0125,0.0,0.309,0.595,169.996,4,2LIpx9uryPMS6g7748AfVs,1.0,0.7414
16,spotify:track:1xs0QWOGclXN3kPCZnCThJ,Love Magic,Love Magic,hibiku,2018-06-06,298457,1,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0373,0.0356,0.0,0.395,0.806,120.024,4,1xs0QWOGclXN3kPCZnCThJ,1.0,0.7333
155,spotify:track:5n7Sp2uHvRWbKYU1KIqaNA,シンデレラ,SODA POP FANCLUB 4,Cidergirl,2021-12-01,213053,54,False,31rprvymcpshapgs642x5d2rwabe,2024-02-24T12:21:18Z,...,0.0595,0.00968,0.0,0.317,0.769,129.988,4,5n7Sp2uHvRWbKYU1KIqaNA,1.0,0.7289
84,spotify:track:1kMlcLyljkrShV0LSdSGBz,Yesterday,Traveler,OFFICIAL HIGE DANDISM,2019-08-31,299164,56,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0382,0.0442,0.0,0.398,0.56,129.939,4,1kMlcLyljkrShV0LSdSGBz,1.0,0.6842
25,spotify:track:7C1Lzlt1bOjNAxKboc9b98,ダイバー,Origin,KANA-BOON,2016-02-17,273586,44,False,31rprvymcpshapgs642x5d2rwabe,2022-11-16T08:54:10Z,...,0.0429,0.000107,0.00119,0.349,0.547,139.899,4,7C1Lzlt1bOjNAxKboc9b98,1.0,0.6787
