# Rakan's Playground




## Setup

In [1]:
!pip install spotipy



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import sys
PROJECT_ROOT = '/content/drive/My Drive/UofT/4th Year/APS360/Project/Hit-Song-Prediction/'
sys.path.append(PROJECT_ROOT)

In [4]:
from SpotifyConnection import get_spotify_connection
import Playlist as PL
from DataProcessing import normalize_dataframe, DataInformation, SpotifyTracksDataset

In [5]:
sp = get_spotify_connection()

### PyTorch

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [7]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [110]:
import random

def seed_torch(seed=0):
  random.seed(seed)

  np.random.seed(seed)
  np.random.RandomState(seed)

  torch.manual_seed(seed) 
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed) #seed all gpus    
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.enabled = False  
  torch.backends.cudnn.benchmark = False

seed_torch(0)

## Other

In [8]:
import numpy as np

## Loading Data

In [9]:
DataInformation.list()

['duration',
 'key',
 'mode',
 'time_signature',
 'acousticness',
 'danceability',
 'energy',
 'instrumentalness',
 'liveness',
 'loudness',
 'speechiness',
 'valence',
 'tempo',
 'artist_popularity',
 'popularity']

In [150]:
pl = PL.load_playlist(PROJECT_ROOT + "playlist_2sRZldX6n9oaII70OoO3zB")
df = pl.get_playlist_df(cols=DataInformation.list())
cols_to_normalize = DataInformation.Feature.list()
cols_to_normalize.remove('artist_popularity')
normalized_df = normalize_dataframe(df, cols_to_normalize)


Loaded playlist from file: /content/drive/My Drive/UofT/4th Year/APS360/Project/Hit-Song-Prediction/playlist_2sRZldX6n9oaII70OoO3zB


In [90]:
normalized_df.head()

Unnamed: 0,duration,key,mode,time_signature,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,valence,tempo,artist_popularity,popularity
0,0.068542,0.0,1.0,0.8,0.111444,0.805471,0.595076,0.0,0.145511,0.858757,0.079468,0.409,0.616788,98,76
1,0.066175,0.909091,0.0,0.8,0.344376,0.701114,0.714348,0.0,0.083591,0.896666,0.373404,0.615,0.627561,90,63
2,0.067781,0.727273,0.0,1.0,0.241966,0.493414,0.366554,0.0,0.108359,0.787748,0.056489,0.161,0.311547,83,75
3,0.055939,0.727273,1.0,1.0,0.127508,0.597771,0.653209,0.0,0.042931,0.865515,0.356383,0.691,0.569657,77,29
4,0.070991,0.090909,1.0,0.8,0.035641,0.834853,0.734394,0.0,0.312693,0.87482,0.065213,0.393,0.447603,84,75


In [11]:
class SpotifyTracksDataset2(Dataset):
    """
    Dataset of Spotify Songs and their features
    """
    def __init__(self, df):
        self.df = df
        self.features = DataInformation.Feature.list()
        self.target = DataInformation.TARGET

    def __getitem__(self, index):
        if isinstance(index, slice):
            for i in range(index.start, index.stop):
                if i == index.start:                
                    all_features = torch.FloatTensor(self.df[self.features].iloc[i]).unsqueeze(0)
                    all_labels = torch.FloatTensor(np.asarray([self.df[self.target].iloc[i]])).unsqueeze(0)
                else:
                    features = torch.FloatTensor(self.df[self.features].iloc[i]).unsqueeze(0)
                    label = torch.FloatTensor(np.asarray([self.df[self.target].iloc[i]])).unsqueeze(0)

                    all_features = torch.cat((all_features, features), dim=0)
                    all_labels = torch.cat((all_labels, label), dim=0)
                
            return all_features, all_labels
        else:
            features = torch.FloatTensor(self.df[self.features].iloc[index])
            label = torch.FloatTensor(np.asarray([self.df[self.target].iloc[index]]))
            return features, label

    def __len__(self):
        return self.df.shape[0]

    def getitem(self, index):
        return self.df[self.features].iloc[index]

In [12]:
dataset = SpotifyTracksDataset2(normalized_df)
dataset_small = SpotifyTracksDataset2(normalized_df.iloc[0:100])

In [13]:
dataset_tiny = SpotifyTracksDataset2(normalized_df.iloc[0:10])

In [146]:
num_data = normalized_df.shape[0]
num_train = int(num_data*0.01)
num_val = int(num_data*0.01)
num_test = num_data - num_train - num_val

train = SpotifyTracksDataset2(normalized_df.iloc[0:num_train])
val = SpotifyTracksDataset2(normalized_df.iloc[num_train:num_train+num_val])
test = SpotifyTracksDataset2(normalized_df.iloc[num_train+num_val:num_data])

## Model

In [174]:
class TestNet(nn.Module):
  def __init__(self):
    super(TestNet, self).__init__()
    
    self.layers = nn.Sequential(
        nn.Linear(14, 50),
        nn.ReLU(),
        nn.Linear(50,1)
    ).to(device)


  def forward(self, x):
    x = self.layers(x)
    return x

## Training

### Helper Functions

In [165]:
def get_error(model, data):
  data_loader = torch.utils.data.DataLoader(data, batch_size=100)

  total_error = 0
  for features, targets in data_loader:
    features = features.to(device)
    targets = targets.to(device)

    out = model(features)
    total_error += (targets - out.clamp(min=0,max=100)).abs().sum().item()
  
  error_rate = total_error/len(data)
  return error_rate

In [166]:
def predict_some(model, data, num_to_predict):
  for i in range(0, num_to_predict):
    features, target = data[i]
    features = features.to(device)
    target = target.to(device)

    out = model(features.unsqueeze(0)).clamp(min=0,max=100)
    print(f"Actual: {round(target.item(), 2)};  Predicted: {round(out.item(), 2)}")

### Train Function

In [154]:
def train_net(model, train_data, valid_data, bs=50, lr=0.01, wd=0, epochs=5):
  train_loader = torch.utils.data.DataLoader(train_data, batch_size=bs, shuffle=True)

  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

  for epoch in range(1, epochs + 1):
    epoch_error = 0
    epoch_loss = 0.0
    for i, batch in enumerate(train_loader, 1):
      features, targets = batch

      features = features.to(device)
      targets = targets.to(device)
      
      optimizer.zero_grad()
      out = model(features)
      
      loss = criterion(out, targets)
      loss.backward()
      optimizer.step()


      epoch_loss += loss.item()
      epoch_error += (targets - out.clamp(min=0,max=100)).abs().sum().item()

    epoch_loss = epoch_loss / i
    train_error_rate = epoch_error / len(train_data)
    val_error_rate = get_error(model, valid_data)
    
    print(f"Epoch {epoch}")
    print(f"  Train Error: {train_error_rate}")
    print(f"  Valid Error: {val_error_rate}")
    print(f"  Train Loss: {epoch_loss}")

    predict_some(model, train_data, 5)
    predict_some(model, valid_data, 5)

    print("\n")

### Training Model

In [178]:
model = TestNet()

if torch.cuda.is_available():
  model.cuda()

print(len(train))
train_net(model, train, val, bs=10, lr=0.001, epochs=2000)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Actual: 71.0;  Predicted: 57.39
Actual: 62.0;  Predicted: 72.72
Actual: 44.0;  Predicted: 58.63
Actual: 29.0;  Predicted: 57.55
Actual: 78.0;  Predicted: 57.1


Epoch 303
  Train Error: 24.940303096064813
  Valid Error: 19.520659270109952
  Train Loss: 1052.3405659993489
Actual: 76.0;  Predicted: 72.92
Actual: 63.0;  Predicted: 70.44
Actual: 75.0;  Predicted: 68.09
Actual: 29.0;  Predicted: 60.76
Actual: 75.0;  Predicted: 73.06
Actual: 71.0;  Predicted: 61.75
Actual: 62.0;  Predicted: 78.05
Actual: 44.0;  Predicted: 63.04
Actual: 29.0;  Predicted: 61.87
Actual: 78.0;  Predicted: 61.18


Epoch 304
  Train Error: 26.22031558001483
  Valid Error: 21.33861400462963
  Train Loss: 970.8004659016927
Actual: 76.0;  Predicted: 81.56
Actual: 63.0;  Predicted: 78.79
Actual: 75.0;  Predicted: 76.63
Actual: 29.0;  Predicted: 67.89
Actual: 75.0;  Predicted: 81.98
Actual: 71.0;  Predicted: 69.24
Actual: 62.0;  Predicted: 87.18
Actual: 4

KeyboardInterrupt: ignored