# Farah's Playground




## Setup

### General

In [1]:
!pip install spotipy

Collecting spotipy
  Downloading https://files.pythonhosted.org/packages/f1/62/210c7942dede1c0d39aa56520d59c4565e232f957ea4caea87da7e6f3e61/spotipy-2.13.0-py3-none-any.whl
Installing collected packages: spotipy
Successfully installed spotipy-2.13.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
import sys
PROJECT_ROOT = '/content/drive/My Drive/UofT/APS360/Project/Hit-Song-Prediction/'
sys.path.append(PROJECT_ROOT)

In [4]:
from SpotifyConnection import get_spotify_connection
import Playlist as PL
from DataProcessing import normalize_dataframe, DataInformation, SpotifyTracksDataset

In [5]:
sp = get_spotify_connection()

### PyTorch

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader


In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [10]:
import random

def seed_torch(seed=0):
  random.seed(seed)

  np.random.seed(seed)
  np.random.RandomState(seed)

  torch.manual_seed(seed) 
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed) #seed all gpus    
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.enabled = False  
  torch.backends.cudnn.benchmark = False

seed_torch(0)

## Loading Data

In [11]:
DataInformation.list()

['duration',
 'key',
 'mode',
 'time_signature',
 'acousticness',
 'danceability',
 'energy',
 'instrumentalness',
 'liveness',
 'loudness',
 'speechiness',
 'valence',
 'tempo',
 'artist_popularity',
 'popularity']

In [12]:
pl = PL.load_playlist(PROJECT_ROOT + "playlist_2sRZldX6n9oaII70OoO3zB")
df = pl.get_playlist_df(cols=DataInformation.list())
cols_to_normalize = DataInformation.Feature.list()
cols_to_normalize.remove('artist_popularity')
normalized_df = normalize_dataframe(df, cols_to_normalize)


Loaded playlist from file: /content/drive/My Drive/UofT/APS360/Project/Hit-Song-Prediction/playlist_2sRZldX6n9oaII70OoO3zB


In [13]:
normalized_df.head()

Unnamed: 0,duration,key,mode,time_signature,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,artist_popularity,popularity
0,0.068542,0.0,1.0,0.8,0.111444,0.805471,0.595076,0.0,0.145511,0.858757,0.079468,0.409,0.616788,98,76
1,0.066175,0.909091,0.0,0.8,0.344376,0.701114,0.714348,0.0,0.083591,0.896666,0.373404,0.615,0.627561,90,63
2,0.067781,0.727273,0.0,1.0,0.241966,0.493414,0.366554,0.0,0.108359,0.787748,0.056489,0.161,0.311547,83,75
3,0.055939,0.727273,1.0,1.0,0.127508,0.597771,0.653209,0.0,0.042931,0.865515,0.356383,0.691,0.569657,77,29
4,0.070991,0.090909,1.0,0.8,0.035641,0.834853,0.734394,0.0,0.312693,0.87482,0.065213,0.393,0.447603,84,75


In [14]:
dataset = SpotifyTracksDataset(normalized_df)
dataset_small = SpotifyTracksDataset(normalized_df.iloc[0:100])

In [15]:
dataset_tiny = SpotifyTracksDataset(normalized_df.iloc[0:10])

In [17]:
num_data = normalized_df.shape[0]
num_train = int(num_data*0.01)
num_val = int(num_data*0.01)
num_test = num_data - num_train - num_val

train = SpotifyTracksDataset(normalized_df.iloc[0:num_train])
val = SpotifyTracksDataset(normalized_df.iloc[num_train:num_train+num_val])
test = SpotifyTracksDataset(normalized_df.iloc[num_train+num_val:num_data])

## Model

In [18]:
class TestNet(nn.Module):
  def __init__(self):
    super(TestNet, self).__init__()
    
    self.layers = nn.Sequential(
        nn.Linear(14, 50),
        nn.ReLU(),
        nn.Linear(50,1)
    ).to(device)


  def forward(self, x):
    x = self.layers(x)
    return x

## Training

### Helper Functions

In [19]:
def get_error(model, data):
  data_loader = torch.utils.data.DataLoader(data, batch_size=100)

  total_error = 0
  for features, targets in data_loader:
    features = features.to(device)
    targets = targets.to(device)

    out = model(features)
    total_error += (targets - out.clamp(min=0,max=100)).abs().sum().item()
  
  average_error = total_error/len(data)
  return average_error

In [20]:
def predict_some(model, data, num_to_predict):
  for i in range(0, num_to_predict):
    features, target = data[i]
    features = features.to(device)
    target = target.to(device)

    out = model(features.unsqueeze(0)).clamp(min=0,max=100)
    print(f"Actual: {round(target.item(), 2)};  Predicted: {round(out.item(), 2)}")

### Train Function

In [21]:
def train_net(model, train_data, valid_data, bs=50, lr=0.01, wd=0, epochs=5):
  train_loader = torch.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True)

  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

  for epoch in range(1, epochs + 1):
    epoch_error = 0
    epoch_loss = 0.0
    for i, batch in enumerate(train_loader, 1):
      features, targets = batch

      features = features.to(device)
      targets = targets.to(device)
      
      optimizer.zero_grad()
      out = model(features)
      
      loss = criterion(out, targets)
      loss.backward()
      optimizer.step()


      epoch_loss += loss.item()
      epoch_error += (targets - out.clamp(min=0,max=100)).abs().sum().item()

    epoch_loss = epoch_loss / i
    train_error_rate = epoch_error / len(train_data)
    val_error_rate = get_error(model, valid_data)
    
    print(f"Epoch {epoch}")
    print(f"  Train Error: {train_error_rate}")
    print(f"  Valid Error: {val_error_rate}")
    print(f"  Train Loss: {epoch_loss}")

    predict_some(model, train_data, 5)
    predict_some(model, valid_data, 5)

    print("\n")

### Training Model

In [24]:
model = TestNet()

if torch.cuda.is_available():
  model.cuda()

print(len(train))
train_net(model, train, val, bs=10, lr=0.001, epochs=100)

54
Epoch 1
  Train Error: 51.840724803783274
  Valid Error: 51.62315086082176
  Train Loss: 3354.4405517578125
Actual: 76.0;  Predicted: 7.29
Actual: 63.0;  Predicted: 6.67
Actual: 75.0;  Predicted: 6.14
Actual: 29.0;  Predicted: 5.75
Actual: 75.0;  Predicted: 6.25
Actual: 71.0;  Predicted: 5.9
Actual: 62.0;  Predicted: 5.9
Actual: 44.0;  Predicted: 4.22
Actual: 29.0;  Predicted: 4.22
Actual: 78.0;  Predicted: 6.07


Epoch 2
  Train Error: 50.19712999131944
  Valid Error: 49.908944589120374
  Train Loss: 3113.2985026041665
Actual: 76.0;  Predicted: 10.18
Actual: 63.0;  Predicted: 9.34
Actual: 75.0;  Predicted: 8.59
Actual: 29.0;  Predicted: 8.04
Actual: 75.0;  Predicted: 8.74
Actual: 71.0;  Predicted: 8.23
Actual: 62.0;  Predicted: 8.25
Actual: 44.0;  Predicted: 5.92
Actual: 29.0;  Predicted: 5.92
Actual: 78.0;  Predicted: 8.49


Epoch 3
  Train Error: 48.704764189543546
  Valid Error: 48.180704752604164
  Train Loss: 2829.5838623046875
Actual: 76.0;  Predicted: 13.1
Actual: 63.0;  Pre