<a href="https://colab.research.google.com/github/Lactase/Python-Projects/blob/main/Spotify_Likeability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import random
import requests
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import json

In [None]:
# Import csv from exportify
df = pd.read_csv('liked_songs.csv')

# Removes the spotify:track part and leaves the track id
df['Track ID'] = df['Track URI'].str.replace('spotify:track:', '', regex=False)

# Can be changed according to music taste
cond_tempo = df['Tempo'] > 110
cond_dance = df['Danceability'] > 0.5
cond_energy = df['Energy'] > 0.5
cond_valence = df['Valence'] > 0.1
cond_speech = df['Speechiness'] < 0.1
cond_acoustic = df['Acousticness'] < 0.2
cond_instrumental = df['Instrumentalness'] < 0.1
cond_liveness = df['Liveness'] > 0.1

# Create Liked column and true if all criteria are met
df['Liked'] = cond_tempo & cond_dance & cond_energy & cond_valence & cond_speech & cond_acoustic & cond_instrumental & cond_liveness

# Convert to int
df['Liked'] = df['Liked'].astype(int)
df['Liked'].sum()
df

In [None]:
# number of songs in csv
n_songs = len(df)

seed = np.random.seed(4200)

X_track_features = df[['Tempo', 'Danceability', 'Energy', 'Valence', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness']].values
y_liked = df['Liked'].values

# scales the data points to mean close to 0 and std of 1
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_track_features)

# Turn scaled values into tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_liked, dtype=torch.float32).unsqueeze(1)


# split data so you have data to train and test on (20% used to test and 80% in training)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_liked, test_size=0.2, random_state=seed)

# Convert training/testing data to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

In [None]:
# Neural Network

class MusicClassifier(nn.Module):
  def __init__(self):
    # inherits from nn.Module
    super(MusicClassifier, self).__init__()

    # layers
    # connect layers 8 input 12 output to hidden layer
    self.fc1 = nn.Linear(in_features = 8, out_features = 12)

    # RELU
    self.relu = nn.ReLU()

    # connect layers 12 input from hidden layer to 1 output to output
    self.fc2 = nn.Linear(in_features = 12, out_features = 1)

    # Sigmoid
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    # x is the tensor for the song features
    x = self.fc1(x)
    x = self.relu(x)
    x = self.fc2(x)
    x = self.sigmoid(x)
    return x

model = MusicClassifier()
# fc1 -> relu -> fc2 -> sigmoid

In [None]:
# Loss function
loss_func = nn.BCELoss()
# optimizer with learning rate of 0.01 (model parameters is the weights and biases)
optimizer = optim.Adam(model.parameters(), lr=0.01)
# number of times model will train
epochs = 100

for epoch in range(epochs):
  # start training
  model.train()
  # plugs training data into model
  outputs = model(X_train_tensor)
  # checks output and compare to training data and calculates loss
  loss = loss_func(outputs,y_train_tensor)
  # cleans the gradiant every run
  optimizer.zero_grad()
  # backpropagation
  loss.backward()
  # updates the weights and biases based on loss
  optimizer.step()

  # print epoch and loss every 100
  if (epoch) % 10== 0:
    print(f'Epoch: {epoch}, Loss: {loss.item()}')

In [None]:
# Evaluate the model
model.eval()
# Calculate accuracy with test data
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = loss_func(test_outputs, y_test_tensor)
    # predicted liked
    predicted_liked = (test_outputs > 0.5).float()
    # Since in tensor type need item to get value to use for accuracy
    accuracy = (predicted_liked == y_test_tensor).sum().item() / y_test_tensor.size(0)

print(f'Test Loss: {test_loss.item():.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

In [None]:
# Test any songs
spotify_song_link = 'https://open.spotify.com/track/406IpEtZPvbxApWTGM3twY'
spotify_song_id = spotify_song_link.split('/')[-1]

import requests

url = "https://api.reccobeats.com/v1/track?ids=" + spotify_song_id

payload = {}
headers = {
  'Accept': 'application/json'
}

response = requests.request("GET", url, headers=headers, data=payload)

print(response.text)
reccobeats_song_id = response.json()['content'][0]['id']

url = "https://api.reccobeats.com/v1/track/" + reccobeats_song_id + "/audio-features"

response = requests.request("GET", url, headers=headers, data=payload)
print(response.text)
try:
  valence = response.json()['valence']
  danceability = response.json()['danceability']
  energy = response.json()['energy']
  tempo = response.json()['tempo']
  speechiness = response.json()['speechiness']
  acousticness = response.json()['acousticness']
  instrumentalness = response.json()['instrumentalness']
  liveness = response.json()['liveness']

  X_track_features = np.array([[tempo, danceability, energy, valence, speechiness, acousticness, instrumentalness, liveness]])

  # scales the data points to mean close to 0 and std of 1
  X_scaled = scaler.transform(X_track_features)

  model_input = torch.tensor(X_scaled, dtype=torch.float32)
  with torch.no_grad():
    outputs = model(model_input)
  probability = outputs.detach().numpy()
  probability = np.round(probability.item(), 4)*100
  print(f'The song is {probability}% likely to be liked')
except:
  print('No Audio Features found')



{"content":[{"id":"fe75a3b3-d9eb-4c1c-bca8-52c28bf054b9","trackTitle":"HOT","artists":[{"id":"7e8edb62-c8fc-4931-8833-a28a6607e944","name":"LE SSERAFIM","href":"https://open.spotify.com/artist/4SpbR6yFEvexJuaBpgAU5p"}],"durationMs":143797,"isrc":"USA2P2506404","ean":null,"upc":null,"href":"https://open.spotify.com/track/406IpEtZPvbxApWTGM3twY","availableCountries":"AR,AU,AT,BE,BO,BR,BG,CA,CL,CO,CR,CY,CZ,DK,DO,DE,EC,EE,SV,FI,FR,GR,GT,HN,HK,HU,IS,IE,IT,LV,LT,LU,MY,MT,MX,NL,NZ,NI,NO,PA,PY,PE,PH,PL,PT,SG,SK,ES,SE,CH,TW,TR,UY,US,GB,AD,LI,MC,ID,JP,TH,VN,RO,IL,ZA,SA,AE,BH,QA,OM,KW,EG,MA,DZ,TN,LB,JO,PS,IN,BY,KZ,MD,UA,AL,BA,HR,ME,MK,RS,SI,KR,BD,PK,LK,GH,KE,NG,TZ,UG,AG,AM,BS,BB,BZ,BT,BW,BF,CV,CW,DM,FJ,GM,GE,GD,GW,GY,HT,JM,KI,LS,LR,MW,MV,ML,MH,FM,NA,NR,NE,PW,PG,PR,WS,SM,ST,SN,SC,SL,SB,KN,LC,VC,SR,TL,TO,TT,TV,VU,AZ,BN,BI,KH,CM,TD,KM,GQ,SZ,GA,GN,KG,LA,MO,MR,MN,NP,RW,TG,UZ,ZW,BJ,MG,MU,MZ,AO,CI,DJ,ZM,CD,CG,IQ,LY,TJ,VE,ET,XK","popularity":82}]}
{"id":"fe75a3b3-d9eb-4c1c-bca8-52c28bf054b9","acousticnes

In [None]:
# Test with another playlist
df = pd.read_csv('friend_playlist.csv')
# Removes the spotify:track part and leaves the track id
df['Track ID'] = df['Track URI'].str.replace('spotify:track:', '', regex=False)
df.head()

X_track_features = df[['Tempo', 'Danceability', 'Energy', 'Valence', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness']].values

# scales the data points to mean close to 0 and std of 1
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_track_features)

# Turn scaled values into tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
with torch.no_grad():
  outputs = model(X_tensor)
predicted_liked = (outputs > 0.5).float()
probability = outputs.detach().numpy()
df['Liked'] = predicted_liked.numpy()
df['Probability'] = np.round(probability, 4)
liked_songs = df[df['Liked'] == 1]
df_sorted_desc = liked_songs.sort_values(by='Probability', ascending=False)
df_sorted_desc
