In [1]:
import numpy as np
import pickle
import time
import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.cuda.amp.autocast_mode import autocast
from torch.cuda.amp.grad_scaler import GradScaler
from torch.utils.data import DataLoader
from PIL import Image

  example_input = torch.tensor([[-3, -2, -1], [0, 1, 2]])


In [65]:

class FeatConv(nn.Module):

    def __init__(self, fsize):
        super(FeatConv, self).__init__()
        self.cnv1 = nn.Conv2d(1, fsize, 3, 1, 1, bias=False)
        self.cnv2 = nn.Conv2d(fsize, fsize, 3, 1, 1, bias=False)
        self.cnv3 = nn.Conv2d(fsize, fsize, 3, 1, 1, bias=False)
        self.icnv = nn.Conv2d(fsize, fsize * 2, 1, bias=False)
        self.cnv4 = nn.Conv2d(fsize * 2, fsize * 2, 3, 1, 1, bias=False)
        self.cnv5 = nn.Conv2d(fsize * 2, fsize * 2, 3, 1, 1, bias=False)
        self.nrm1 = nn.BatchNorm2d(fsize)
        self.nrm2 = nn.BatchNorm2d(fsize)
        self.nrm3 = nn.BatchNorm2d(fsize)
        self.nrmc1 = nn.BatchNorm2d(fsize)
        self.nrmc2 = nn.BatchNorm2d(fsize * 2)
        self.nrm4 = nn.BatchNorm2d(fsize * 2)
        self.nrm5 = nn.BatchNorm2d(fsize * 2)
        self.nrmi = nn.BatchNorm2d(fsize * 2)
        self.fpool = nn.AdaptiveAvgPool2d(6)
        self.act = nn.ELU()
        self.flat = nn.Flatten()
        self.drop = nn.Dropout2d(0.16)
        self.ll1 = nn.Linear(2304, 512)
        self.ll2 = nn.Linear(512, 128)
        self.ll3 = nn.Linear(128, 19)

    def forward(self, x):
        x = self.act(self.nrm1(self.cnv1(x)))
        res = x
        x = self.act(self.nrm2(self.cnv2(x)))
        x = self.act(self.nrm3(self.cnv3(x)))
        x = self.act(self.nrmc1(torch.add(x, res)))
        x = self.act(self.nrmi(self.icnv(x)))
        res = x
        x = self.act(self.nrm4(self.cnv4(x)))
        x = self.act(self.nrm5(self.cnv5(x)))
        x = self.act(self.nrmc2(torch.add(x, res)))
        x = self.drop(x)
        x = self.fpool(x)
        x = self.flat(x)
        x = self.act(self.ll1(x))
        x = self.act(self.ll2(x))
        x = self.act(self.ll3(x))
        return x

class Trainer:

    def __init__(self, model: nn.Module, model_name: str, lrn_rate1: float, wdecay1: float, save_threshold: float) -> None:
        self.model = model.to('cuda')
        self.save_threshold = save_threshold
        self.lrn_rate1 = lrn_rate1
        self.wdecay1 = wdecay1
        self.optimizer = torch.optim.NAdam(self.model.parameters(), lr=lrn_rate1, weight_decay=wdecay1)
        self.loss_function = nn.CrossEntropyLoss()
        self.scaler = GradScaler(enabled=True)
        self.model_name = model_name
        self.tbatch_counter = 0
        self.vbatch_counter = 0

    def split_data(self, dataset: list[tuple[torch.Tensor, int]], ratio: float, splits):
        vdata = []
        counts = collections.Counter([x[1] for x in dataset])
        for x in counts.items():
            gcounter = 0
            i = 0
            while gcounter < int(x[1] * ratio):
                if dataset[i][1] == x[0]:
                    vdata.append(dataset.pop(i))
                    gcounter += 1
                i += 1
        tdata = [dataset[(x - 1) * int(len(dataset) / splits) : x * int(len(dataset) / splits)] for x in range(1, splits + 1)]
        return tdata, vdata

    def _train(self, dataset):
        total_loss = 0
        self.model.train()
        for x in dataset:
            with autocast(enabled=True):
                data, labels = x[0].to('cuda'), x[1].to('cuda')
                results = self.model(data)
                loss = self.loss_function(results, labels)
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            total_loss += loss.item()
            self.optimizer.zero_grad()
            self.tbatch_counter += 1
        return total_loss / len(dataset)

    def _validate(self, dataset):
        total_loss = 0
        self.model.eval()
        predictions = []
        with torch.no_grad():
            for x in dataset:
                data, labels = x[0].to('cuda'), x[1].to('cuda')
                results = self.model(data)
                for j, w in enumerate([F.softmax(y, dim=0).argmax() for y in results]):
                    predictions.append((labels[j], w))
                total_loss += self.loss_function(results, labels).item()
                self.vbatch_counter += 1
        return total_loss / len(dataset), predictions

    def train_model(self, dataset, target_epochs: int, batch_size: int, ratio: float, splits):
        self.batch_size = batch_size
        samples = len(dataset)
        train_data, val_data = self.split_data(dataset, ratio, splits)
        
        for ep in range(target_epochs):
            for i, group in enumerate(train_data):
                self.batches_per_group = int(int(samples * (1 - ratio)) / self.batch_size)

                tdata = DataLoader(group, batch_size=self.batch_size, shuffle=True)
                train_loss = self._train(tdata)
                vdata = DataLoader(val_data, batch_size=self.batch_size, shuffle=True)
                val_loss, predictions = self._validate(vdata)

                print(f'Status at batch {self.batches_per_group * (1 + i)},\tTLoss: {round(float(train_loss), 5)}\tVLoss: {round(float(val_loss), 5)}')
                _ = print_metrics(predictions)

                if val_loss < self.save_threshold:
                    self.save_threshold = val_loss
                    torch.save(self.model.state_dict(), f'models/{self.model_name}')

        self.model = self.model.to('cpu')
        del dataset
        torch.cuda.empty_cache()

def group_predict(adata: list[list[np.ndarray]], model: nn.Module) -> list[tuple[int, int]]:
    predictions = []
    model.to('cuda')
    model.eval()
    with torch.no_grad():
        for song in adata:
            results = []
            for frame in song[0]:
                spctr, ftr = frame[0].to('cuda'), frame[1].to('cuda')
                results.append(torch.nn.functional.softmax(*model((spctr, ftr)), dim=0).tolist())
            scores = [round(sum(w), 4) for w in  np.array(results).T]
            predictions.append((song[1], scores.index(np.max(scores))))
    model.to('cpu')
    return predictions

def print_metrics(predictions):
    print(f'Label\t\tRecall\t\tPrecision\tF1\t\tRatio')
    metrics = []
    total = len(predictions)
    for x in range(19):
        tp = 0
        guesses = [y for y in predictions if y[1] == x]
        actual = [y for y in predictions if y[0] == x]
        for z in actual:
            if z[0] == z[1]:
                tp += 1
        alen = len(actual)
        glen = len(guesses)
        if glen == 0 or alen == 0 or tp == 0:
            alen += 1
            glen += 1
            tp += 1
        recall = round(tp / alen, 3)
        precis = round(tp / glen, 3)
        f1 = round(2 * (precis * recall) / (precis + recall), 3)
        metrics.append((alen, glen, tp))
        print(f'{x}\t\t{recall}\t\t{precis}\t\t{f1}\t\t{tp}/{glen}/{alen}')
    correct = sum([x[2] for x in metrics])
    mprec = round(np.mean([x[2] / x[1] for x in metrics]), 4)
    mrec = round(np.mean([x[2] / x[0] for x in metrics]), 4)
    f1mac = round(2 * ((mprec * mrec) / (mprec**-1 + mrec**-1)), 4)
    f1mic = round(correct / total, 4)
    mcc = round((correct * total - sum([x[0] * x[1] for x in metrics])) / np.sqrt((total**2 - sum(x[0]**2 for x in metrics))*(total**2 - sum(x[1]**2 for x in metrics))), 4)
    #ck = round((correct * total - sum([x[1] * x[0] for x in metrics])) / (total**2 - sum([x[1] * x[0] for x in metrics])), 3)
    print('\n')
    print(f"Macro Precision\t\t{mprec}")
    print(f'Macro Recall\t\t{mrec}')
    print(f'F1 Macro\t\t{f1mac}')
    print(f'F1 Micro\t\t{f1mic}')
    print(f'MCC\t\t\t{mcc}')
    #print(f'CK\t\t\t{ck}')
    print(correct, '/', total)
    return mprec, mrec, f1mac, f1mic, mcc


In [66]:
del fmodel
del trainer

In [3]:
with open(f'data/feats', 'rb') as f:
    adata = pickle.load(f)
with open('data/train_labels', 'rt') as f:
    labels = f.readlines()
labels = [x.split(',') for x in labels]
labels = {x[1]: int(x[3]) for x in labels}
adata = [(torch.tensor(x[0], dtype=torch.float32).unsqueeze(0), labels[x[1]]) for x in adata]

In [67]:

train_session = '0'
fmodel = FeatConv(32)
print(sum(x.numel() for x in fmodel.parameters() if x.requires_grad))

#av_model.load_state_dict(torch.load('models/mscls_x'))
trainer = Trainer(fmodel, f'ftest_{train_session}', 3e-4, 1e-5, 1.4)
trainer.train_model(adata.copy(), 3, 32, 0.1, 1)


1343539
Status at batch 559,	TLoss: 1.94697	VLoss: 1.93246
Label		Recall		Precision	F1		Ratio
0		0.215		0.475		0.296		66/139/307
1		0.56		0.276		0.37		173/626/309
2		0.442		0.389		0.414		114/293/258
3		0.106		0.463		0.173		19/41/179
4		0.737		0.353		0.477		129/365/175
5		0.678		0.328		0.442		82/250/121
6		0.051		0.6		0.094		6/10/118
7		0.327		0.291		0.308		34/117/104
8		0.021		0.154		0.037		2/13/94
9		0.086		0.7		0.153		7/10/81
10		0.013		0.125		0.024		1/8/79
11		0.592		0.468		0.523		29/62/49
12		0.95		0.974		0.962		38/39/40
13		0.067		0.25		0.106		2/8/30
14		0.067		1.0		0.126		1/1/15
15		0.1		1.0		0.182		1/1/10
16		0.1		0.5		0.167		1/2/10
17		0.167		1.0		0.286		1/1/6
18		0.5		1.0		0.667		1/1/2


Macro Precision		0.5446
Macro Recall		0.3041
F1 Macro		0.0646
F1 Micro		0.3567
MCC			0.2866
707 / 1982
Status at batch 559,	TLoss: 1.74906	VLoss: 1.7086
Label		Recall		Precision	F1		Ratio
0		0.573		0.448		0.503		176/393/307
1		0.239		0.278		0.257		74/266/309
2		0.721		0.387		0.504		186/481/258