In [28]:
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
from glob import glob

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
print('pytorch version:', torch.__version__)
global_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', global_device)
from torch.utils.data import DataLoader, random_split, Dataset
from torch.nn.utils.rnn import pack_sequence, pad_packed_sequence

from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score

import pytorch_lightning as pl
from torchmetrics.classification import MultilabelAveragePrecision

pytorch version: 2.2.0.dev20231027+cu121
Device: cuda:0


In [29]:
# Matplotlib settings
import matplotlib
import matplotlib as mp
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.collections import PolyCollection
from matplotlib.colors import colorConverter

titlesize = 20
labelsize = 16
legendsize = labelsize
xticksize = 14
yticksize = xticksize

matplotlib.rcParams['legend.markerscale'] = 1.5     # the relative size of legend markers vs. original
matplotlib.rcParams['legend.handletextpad'] = 0.5
matplotlib.rcParams['legend.labelspacing'] = 0.4    # the vertical space between the legend entries in fraction of fontsize
matplotlib.rcParams['legend.borderpad'] = 0.5       # border whitespace in fontsize units
matplotlib.rcParams['font.size'] = 12
matplotlib.rcParams['font.family'] = 'serif'
matplotlib.rcParams['font.serif'] = 'Times New Roman'
matplotlib.rcParams['axes.labelsize'] = labelsize
matplotlib.rcParams['axes.titlesize'] = titlesize

matplotlib.rc('xtick', labelsize=xticksize)
matplotlib.rc('ytick', labelsize=yticksize)
matplotlib.rc('legend', fontsize=legendsize)

matplotlib.rc('font', **{'family':'serif'})

# Data loading

In [30]:
df_trainval = pd.read_csv('data/train.csv')
df_train, df_val = train_test_split(df_trainval, test_size=0.2)
df_train['tags'] = df_train['tags'].apply(lambda x: np.array(list(map(int, x.split(',')))))
df_val['tags'] = df_val['tags'].apply(lambda x: np.array(list(map(int, x.split(',')))))

df_test = pd.read_csv('data/test.csv')

In [4]:
global_idx2embeds = {} # {idx: np.ndarray[n, 768])}
for npy_file in tqdm(glob('data/track_embeddings/*')):
    track_idx = int(npy_file.split('\\')[1].split('.')[0])
    embeds = np.load(npy_file)
    global_idx2embeds[track_idx] = embeds

100%|██████████| 76714/76714 [00:56<00:00, 1368.15it/s]


In [32]:
num_of_tags = np.zeros(256)
for tags in df_train['tags'].to_list():
    onehot_tags = np.zeros(256)
    onehot_tags[tags] = 1
    num_of_tags += onehot_tags

In [33]:
class TrackDataset(torch.utils.data.Dataset):
    def __init__(self, df_tags, test=False):
        self.df_tags = df_tags
        self.test = test

    def __len__(self):
        return len(self.df_tags)

    def __getitem__(self, idx):
        track_idx = self.df_tags.iloc[idx]['track']
        embeds = global_idx2embeds[track_idx]
        if self.test:
            return track_idx, embeds
        labels_onehot = np.zeros(256)
        labels_onehot[self.df_tags.iloc[idx]['tags']] = 1
        return track_idx, embeds, labels_onehot

In [34]:
def collate(batch):
    track_idxs = torch.IntTensor(np.vstack([triplet[0] for triplet in batch]))
    embeds_list = [torch.FloatTensor(triplet[1]).to(global_device) for triplet in batch]
    embeds = pack_sequence(embeds_list, enforce_sorted=False).to(global_device)
    labels_onehot = torch.FloatTensor(np.vstack([triplet[2] for triplet in batch])).to(global_device)
    return track_idxs, embeds, labels_onehot

def collate_test(batch):
    track_idxs = torch.IntTensor(np.vstack([triplet[0] for triplet in batch]))
    embeds_list = [torch.FloatTensor(triplet[1]) for triplet in batch]
    embeds = pack_sequence(embeds_list, enforce_sorted=False).to(global_device)
    return track_idxs, embeds

batch_size = 256

dataset_train = TrackDataset(df_train)
dataloader_train = DataLoader(dataset_train, batch_size, shuffle=True, collate_fn=collate)

dataset_val = TrackDataset(df_val)
dataloader_val = DataLoader(dataset_val, batch_size, shuffle=False, collate_fn=collate)
dataloader_val_astest = DataLoader(dataset_val, batch_size, shuffle=False, collate_fn=collate_test)

dataset_test = TrackDataset(df_test, test=True)
dataloader_test = DataLoader(dataset_test, batch_size, shuffle=False, collate_fn=collate_test)

In [42]:
class TrackToTags(pl.LightningModule):
    def __init__(self, num_classes=256, input_dim=768, hidden_dim=512, pos_weights=None):
        super().__init__()
        self.num_classes = num_classes
        self.bn = nn.LayerNorm(3*hidden_dim)
        # self.projector =  nn.Linear(hidden_dim, hidden_dim)
        self.lin = nn.Sequential(
            nn.Linear(3*hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.LayerNorm(hidden_dim)
        )
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True, bidirectional=False)
        self.self_attn = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=1, batch_first=True)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(hidden_dim, num_classes)
        self.avg_pooling = nn.AdaptiveAvgPool1d(1)
        self.loss = nn.BCEWithLogitsLoss()
        
        # self.metric = MultilabelAveragePrecision(num_labels=256)
        self.metric = average_precision_score
        self._transit_val = {'preds': [], 'labels': []}
        
    def forward(self, embeds):
        outpack, h = self.gru(embeds)
        unpacked, lengths = pad_packed_sequence(outpack, batch_first=True)
        mask = torch.BoolTensor(unpacked.shape[:2]).to(global_device)
        for i, l in enumerate(lengths):
            mask[i,:l] = False
            mask[i,l:] = True
        attn_out, W = self.self_attn(unpacked, unpacked, unpacked, key_padding_mask=mask)
        # x = (attn_out * unpacked).sum(dim=1)
        h = h.squeeze()
        x = [(v.sum(0)/length).unsqueeze(0) for v, length in zip(unpacked, lengths)]
        x_attn = [(v.sum(0)/length).unsqueeze(0) for v, length in zip(attn_out, lengths)]
        x = torch.cat(x, dim = 0)
        x_attn = torch.cat(x_attn, dim = 0)
        x = torch.cat([x,x_attn,h], dim=1)
        x = self.bn(x)
        x = self.lin(x)
        x = self.relu(x)
        outs = self.fc(x)
        return outs

    def training_step(self, batch, batch_idx):
        track_idxs, embeds_list, labels_onehot = batch
        pred_logits = self(embeds_list)
        loss = self.loss(pred_logits, labels_onehot)
        self.log("train/loss", loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        track_idxs, embeds_list, labels_onehot = batch
        pred_logits = self(embeds_list)
        loss = self.loss(pred_logits, labels_onehot)
        self.log("val/loss", loss, prog_bar=True)
        
        pred_probs = torch.sigmoid(pred_logits)
        self._transit_val['labels'].append(np.array(labels_onehot.int().cpu()))
        self._transit_val['preds'].append(np.array(pred_probs.cpu()))

    def on_validation_epoch_end(self):
        preds = np.vstack(self._transit_val['preds'])
        labels = np.vstack(self._transit_val['labels'])
        ap = self.metric(labels, preds)
        self.log('val/epoch_ap', ap, prog_bar=True)
        self._transit_val['labels'] = []
        self._transit_val['preds'] = []

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=3e-4)
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)
        
        return {"optimizer": optimizer,
                "lr_scheduler": {
                                "scheduler": scheduler,
                                "interval": "epoch",
                                "frequency": 10
                                },
               }

In [43]:
trainer = pl.Trainer(accelerator="gpu", devices=1, val_check_interval=1.0, max_epochs=20, log_every_n_steps=100)
model = TrackToTags()
trainer.fit(model, dataloader_train, dataloader_val)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type               | Params
---------------------------------------------------
0 | bn          | LayerNorm          | 3.1 K 
1 | lin         | Sequential         | 1.1 M 
2 | gru         | GRU                | 2.0 M 
3 | self_attn   | MultiheadAttention | 1.1 M 
4 | relu        | ReLU               | 0     
5 | fc          | Linear             | 131 K 
6 | avg_pooling | AdaptiveAvgPool1d  | 0     
7 | loss        | BCEWithLogitsLoss  | 0     
---------------------------------------------------
4.2 M     Trainable params
0         Non-trainable params
4.2 M     Total params
16.819    Total estimated model params size (MB)


Epoch 0:   1%|          | 1/160 [9:10:08<1457:52:17,  0.00it/s]    , train/loss=0.0565, val/loss=0.0665, val/epoch_ap=0.0273]
Epoch 14:  75%|███████▌  | 120/160 [07:51<02:37,  0.25it/s, v_num=88, train/loss=0.0437, val/loss=0.0449, val/epoch_ap=0.218]

In [24]:
def predict(model, loader):
    model.to(global_device)
    model.eval()
    track_idxs = []
    predictions = []
    with torch.no_grad():
        for data in loader:
            track_idx, embeds = data
            # embeds = [x.to(global_device) for x in embeds]
            pred_logits = model(embeds)
            pred_probs = torch.sigmoid(pred_logits)
            predictions.append(pred_probs.cpu().numpy())
            track_idxs.append(track_idx.numpy())
    predictions = np.vstack(predictions)
    track_idxs = np.vstack(track_idxs).ravel()
    return track_idxs, predictions

In [25]:
val_true = np.array([y[2] for y in dataset_val])
track_idxs_val, val_pred = predict(model, dataloader_val_astest)
average_precision_score(val_true, val_pred)

0.22722494342712735

In [26]:
track_idxs, predictions = predict(model.to(global_device), dataloader_test)

In [27]:
predictions_df = pd.DataFrame([
    {'track': track, 'prediction': ','.join([str(p) for p in probs])}
    for track, probs in zip(track_idxs, predictions)
])
predictions_df.to_csv('results/prediction.csv', index=False)