In [1]:
from torch.utils.data import Dataset,DataLoader,TensorDataset
from torchmetrics import SymmetricMeanAbsolutePercentageError
import matplotlib.pyplot as plt
import torch.nn as nn
import pandas as pd
import numpy as np
import torch
import gc

In [2]:
df = pd.read_csv('Data/All_music.csv')
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.drop("Unnamed: 0",inplace=True,axis=1)
df.set_index("timestamp",inplace=True)

df = df["2020-12-14":]

df["dayofweek"] = df.index.dayofweek.astype("int32")
df["log_views"] = np.log(df["views"] + 1e-8)
df["month"] = df.index.month.astype("int32")
df["year"] = df.index.year.astype("int32")
df["avg_by_song_month"] = df.groupby(["month","year","article"],observed=True)["views"].transform("mean").astype("float64")
df["min_by_song_month"] = df.groupby(["month","year","article"],observed=True)["views"].transform("min").astype("float64")
df["max_by_song_month"] = df.groupby(["month","year","article"],observed=True)["views"].transform("max").astype("float64")

features = ["dayofweek", "month", "year", "min_by_song_month", "max_by_song_month", "avg_by_song_month","log_views"]
target = "views"

In [3]:
class MusicDataset(Dataset):
    def __init__(self,features,target):
        self.feature = features
        self.target = target
    
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self,idx):
        item = self.feature[idx]
        label = self.target[idx]
        
        return item,label

In [4]:
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate

trainer = pl.Trainer(
    max_epochs=100,
    accelerator='gpu', 
    devices=1,
    gradient_clip_val=0.1,
    #limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    limit_train_batches=1.0, #if set to 1.0 gather all training data, default.
    callbacks=[lr_logger, early_stop_callback],
)

train, test = train_test_split(df, test_size=0.2, shuffle=True)

train_x = torch.tensor(train[features].values).to(device)
train_y = torch.tensor(train[target].values).unsqueeze(1).to(device)
test_x = torch.tensor(test[features].values).to(device)
test_y = torch.tensor(test[target].values).unsqueeze(1).to(device)

train = MusicDataset(train_x,train_y)
valid = MusicDataset(test_x,test_y)

train_loader = DataLoader(train,shuffle=False)
valid_loader = DataLoader(valid,shuffle=False)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..


In [5]:
# find optimal learning rate

class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(NeuralNetwork, self).__init__()
        self.layer_1 = nn.Linear(input_dim, hidden_dim)
        self.activ1 = nn.ReLU()
        self.layer_2 = nn.Linear(hidden_dim, 1)
       
    def forward(self, x):
        x = self.layer_1(x)
        x = self.activ1(x)
        x = self.layer_2(x)
        return x


def smape(target, forecast):
  if type(target) == pd.core.frame.DataFrame:
    target = target.values

  denominator = np.abs(target) + np.abs(forecast)
  flag = denominator == 0.

  smape = 2 * np.mean(
      (np.abs(target - forecast) * (1 - flag)) / (denominator + flag)
  )
  return smape

In [7]:
# train the model
def train_model(train_dl, model):
    # define the optimization
    criterion = SymmetricMeanAbsolutePercentageError().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1.1220184543019632e-05)
    # enumerate epochs
    for epoch in range(100):
        # enumerate mini batches
        running_loss = 0
        for i, (inputs, targets) in enumerate(train_dl):
            #Decay Learning Rate
            # clear the gradients
            optimizer.zero_grad()
            # compute the model output
            yhat = model(inputs.float().to(device)).to(device)
            # calculate loss
            loss = criterion(yhat.float(), targets.float()).to(device)
            # credit assignment
            loss.backward()

            # update model weights
            optimizer.step()

            running_loss += loss.item()
            if i % 2000 == 1999:
                print(f"epoch: {epoch}, Loss: {running_loss}")
                running_loss = 0.0

# evaluate the model
def evaluate_model(test_dl, model):
    predictions, actuals = list(), list()
    for i, (inputs, targets) in enumerate(test_dl):
        # evaluate the model on the test set
        yhat = model(inputs.float()).to(device)
        # retrieve numpy array
        yhat = yhat.cpu().detach().numpy()
        actual = targets.numpy()
        actual = actual.reshape((len(actual), 1))
        # round to class values
        yhat = yhat.round()
        # store
        predictions.append(yhat)
        actuals.append(actual)
    predictions, actuals = np.vstack(predictions), np.vstack(actuals)
    # calculate accuracy
    return smape(actuals, predictions)


# make a class prediction for one row of data
def predict(row, model):
    # convert row to data
    row = torch.Tensor([row])
    # make prediction
    yhat = model(row)
    # retrieve numpy array
    yhat = yhat.detach().numpy()
    return yhat

n_input = len(features)
n_hidden = 15
batch_size = 100
learning_rate = 0.01

model = NeuralNetwork(n_input,n_hidden).to(device)
train_model(train_loader,model)

epoch: 0, Loss: 0.960506021976471
epoch: 0, Loss: 0.3504444360733032
epoch: 0, Loss: 0.08003087341785431
epoch: 0, Loss: 0.06884255260229111
epoch: 0, Loss: 0.8303112387657166


KeyboardInterrupt: 