In [53]:
import numpy as np
import pickle
import time
import collections
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.cuda.amp.autocast_mode import autocast
from torch.cuda.amp.grad_scaler import GradScaler
from torch.utils.tensorboard.writer import SummaryWriter
from torch.utils.data import DataLoader
from PIL import Image


In [2]:

class Mini3(nn.Module):

    def __init__(self, ftrs):
        super(Mini3, self).__init__()
        self.dcnv = nn.Conv2d(ftrs, ftrs // 2, 1)
        self.cnv = nn.Conv2d(ftrs // 2, ftrs // 2, 3, 1, 1, bias=False)
        self.ucnv = nn.Conv2d(ftrs // 2, ftrs, 1)
        self.nrm = nn.BatchNorm2d(ftrs // 2)
        self.act = nn.ELU()

    def forward(self, x):
        x = self.act(self.dcnv(x))
        x = self.act(self.nrm(self.cnv(x)))
        x = self.act(self.ucnv(x))
        return x

class ReStep(nn.Module):

    def __init__(self, ftrs, poolks, final=False):
        super(ReStep, self).__init__()
        self.cnv1 = Mini3(ftrs)
        self.cnv2 = Mini3(ftrs)
        self.cnv3 = Mini3(ftrs)
        self.dscale = nn.Conv2d(ftrs, ftrs, 1, poolks, bias=False)
        if final:
            self.uscale = nn.Conv2d(ftrs, ftrs, 1, 1)
        else:
            self.uscale = nn.Conv2d(ftrs, ftrs * 2, 1, 1)
        self.inrm = nn.BatchNorm2d(ftrs)
        self.mxpool = nn.MaxPool2d(poolks, poolks)
        self.iact = nn.ELU()

    def forward(self, x):
        res = x
        x = self.cnv1(x)
        x = self.cnv2(x)
        x = self.mxpool(x)
        x = self.iact(self.inrm(torch.add(x, self.dscale(res))))
        res = x
        x = self.cnv3(x)
        x = self.iact(self.inrm(torch.add(x, res)))
        x = self.iact(self.uscale(x))
        return x

class FeatConv(nn.Module):

    def __init__(self, fsize):
        super(FeatConv, self).__init__()
        self.cnv1 = nn.Conv2d(1, fsize, (3, 41), 1, 1, bias=False)
        self.cnv2 = nn.Conv2d(fsize, fsize, (3, 41), 1, 1, bias=False)
        self.cnv3 = nn.Conv2d(fsize, fsize, (3, 41), 1, 1, bias=False)
        self.nrm1 = nn.BatchNorm2d(fsize)
        self.nrm2 = nn.BatchNorm2d(fsize)
        self.nrm3 = nn.BatchNorm2d(fsize)
        self.fpool = nn.AdaptiveAvgPool2d(4)
        self.act = nn.ELU()
        self.flat = nn.Flatten()
        self.drop = nn.Dropout2d(0.16)
        self.ll1 = nn.Linear(512, 128)
        self.ll2 = nn.Linear(128, 64)
        self.ll3 = nn.Linear(64, 32)
        self.ll4 = nn.Linear(32, 32)
        self.ll5 = nn.Linear(32, 64)
        self.ll6 = nn.Linear(64, 128)
        self.ll7 = nn.Linear(128, 576)

    def forward(self, x):
        x = self.act(self.nrm1(self.cnv1(x)))
        x = self.act(self.nrm2(self.cnv2(x)))
        x = self.act(self.nrm3(self.cnv3(x)))
        x = self.drop(x)
        x = self.fpool(x)
        x = self.flat(x)
        x = self.act(self.ll1(x))
        x = self.act(self.ll2(x))
        x = self.act(self.ll3(x))
        x = self.act(self.ll4(x))
        x = self.act(self.ll5(x))
        x = self.act(self.ll6(x))
        x = self.act(self.ll7(x))
        return x

class MusCls(nn.Module):

    def __init__(self, step_1_ftrs: int, step_2_ftrs: int, step_3_ftrs: int, step_4_ftrs: int):
        #torch.manual_seed(1024)
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
        super(MusCls, self).__init__()
        self.final_ftrs = step_4_ftrs // 2
        self.ecnv1 = nn.Conv2d(1, step_1_ftrs, 3, 1, 1)
        self.ecnv2 = nn.Conv2d(step_1_ftrs, step_1_ftrs, 3, 1, 1)
        self.ecnv3 = nn.Conv2d(step_1_ftrs, step_1_ftrs, 3, 1, 1)
        self.ftrs = FeatConv(step_1_ftrs)
        self.fcnv1 = nn.Conv2d(step_4_ftrs, self.final_ftrs, 3, 1, 1, bias=False)
        self.fcnv2 = nn.Conv2d(step_4_ftrs, self.final_ftrs, 3, 1, 1, bias=False)
        self.fcnv3 = nn.Conv2d(step_4_ftrs, self.final_ftrs, 3, 1, 1, bias=False)
        self.nrm1 = nn.BatchNorm2d(self.final_ftrs)
        self.nrm2 = nn.BatchNorm2d(self.final_ftrs)
        self.nrm3 = nn.BatchNorm2d(self.final_ftrs)
        self.lstep1 = ReStep(step_1_ftrs, (1, 2))
        self.lstep2 = ReStep(step_2_ftrs, (1, 2))
        self.lstep3 = ReStep(step_3_ftrs, 2, True)
        self.lstep4 = ReStep(step_4_ftrs, 2, True)
        self.mstep1 = ReStep(step_1_ftrs, (1, 2))
        self.mstep2 = ReStep(step_2_ftrs, 2)
        self.mstep3 = ReStep(step_3_ftrs, 2, True)
        self.mstep4 = ReStep(step_4_ftrs, 2, True)
        self.hstep1 = ReStep(step_1_ftrs, 2)
        self.hstep2 = ReStep(step_2_ftrs, 2)
        self.hstep3 = ReStep(step_3_ftrs, 2, True)
        self.hstep4 = ReStep(step_4_ftrs, 2, True)
        self.flin = nn.Linear(self.final_ftrs * 9 * 4, self.final_ftrs)
        self.flin2 = nn.Linear(self.final_ftrs, 19)
        self.act = nn.ELU()
        self.flat = nn.Flatten()
        self.fmxpool = nn.AdaptiveAvgPool2d(3)
        self.drop = nn.Dropout2d(0.12)

    def forward(self, inp):
        x = inp[0]
        y = inp[1]
        y = self.ftrs(y)
        x = self.act(self.ecnv1(x))
        x = self.act(self.ecnv2(x))
        x = self.act(self.ecnv3(x))
        x = self.drop(x)
        lows = x[:,:, :16, :]
        mids = x[:,:, 14:62, :]
        highs = x[:,:, 60:, :]
        for i, step in enumerate([
                (self.lstep1, self.mstep1, self.hstep1),
                (self.lstep2, self.mstep2, self.hstep2),
                (self.lstep3, self.mstep3, self.hstep3),
                (self.lstep4, self.mstep4, self.hstep4)
            ]):
            lows = step[0](lows)
            mids = step[1](mids)
            highs = step[2](highs)
            if i >= 2:
                lows = self.drop(lows)
                mids = self.drop(mids)
                highs = self.drop(highs)
        lows = self.act(self.nrm1(self.fcnv1(lows)))
        mids = self.act(self.nrm2(self.fcnv2(mids)))
        highs = self.act(self.nrm3(self.fcnv3(highs)))
        lows = self.flat(self.fmxpool(lows))
        mids = self.flat(self.fmxpool(mids))
        highs = self.flat(self.fmxpool(highs))
        x = torch.cat((y, lows, mids, highs), 1)
        x = self.flin2(self.act(self.flin(x)))
        return x

class Trainer:

    def __init__(self, model: nn.Module, model_name: str, lrn_rate1: float, lrn_rate2: float, wdecay1: float, wdecay2: float, save_threshold: float) -> None:
        self.model = model.to('cuda')
        self.save_threshold = save_threshold
        self.lrn_rate1 = lrn_rate1
        self.lrn_rate2 = lrn_rate2
        self.wdecay1 = wdecay1
        self.wdecay2 = wdecay2
        self.optimizer = torch.optim.NAdam(self.model.parameters(), lr=lrn_rate1, weight_decay=wdecay1)
        self.loss_function = nn.CrossEntropyLoss()
        self.scaler = GradScaler(enabled=True)
        self.model_name = model_name
        self.tblog = SummaryWriter(log_dir=f'C:/Users/BBA/Coding/tblogs/{proj_name}/{model_name}')
        self.tbatch_counter = 0
        self.vbatch_counter = 0

    def split_data(self, dataset: list[tuple[torch.Tensor, int]], ratio: float, splits: int) -> tuple[list[list[tuple[torch.Tensor, int]]], list[tuple[torch.Tensor, int]]]:
        vdata = []
        counts = collections.Counter([x[1] for x in dataset])
        for x in counts.items():
            gcounter = 0
            i = 0
            while gcounter < int(x[1] * ratio):
                if dataset[i][1] == x[0]:
                    vdata.append(dataset.pop(i))
                    gcounter += 1
                i += 1
        tdata = [dataset[(x - 1) * int(len(dataset) / splits) : x * int(len(dataset) / splits)] for x in range(1, splits + 1)]
        return tdata, vdata

    def _train(self, dataset: list[tuple[torch.Tensor, int]]):
        total_loss = 0
        self.model.train()
        for i, x in enumerate(dataset):
            with autocast(enabled=True):
                dat1, dat2, labels = x[0][0].to('cuda'), x[0][1].to('cuda'), x[1].to('cuda')
                results = self.model((dat1, dat2))
                loss = self.loss_function(results, labels)
            self.scaler.scale(loss).backward()
            self.scaler.step(self.optimizer)
            self.scaler.update()
            total_loss += loss.item()
            self.optimizer.zero_grad()
            self.tblog.add_scalar('Train Loss / Batch', total_loss / (i + 1), self.tbatch_counter)
            self.tbatch_counter += 1
        return total_loss / len(dataset)

    def _validate(self, dataset: list[tuple[torch.Tensor, int]]):
        total_loss = 0
        self.model.eval()
        predictions = []
        with torch.no_grad():
            for i, x in enumerate(dataset):
                dat1, dat2, labels = x[0][0].to('cuda'), x[0][1].to('cuda'), x[1].to('cuda')
                results = self.model((dat1, dat2))
                for j, w in enumerate([F.softmax(y, dim=0).argmax() for y in results]):
                    predictions.append((w, labels[j]))
                total_loss += self.loss_function(results, labels).item()
                self.tblog.add_scalar('Validation Loss / Batch', total_loss / (i + 1), self.vbatch_counter)
                self.vbatch_counter += 1
        return total_loss / len(dataset), predictions

    def train_model(self, dataset: list[tuple[torch.Tensor, int]], target_epochs: int, batch_size: int, ratio: float, splits: int, aug_epochs: list[int]):
        self.batch_size = batch_size
        samples = len(dataset)
        train_data, val_data = self.split_data(dataset, ratio, splits)
        
        for ep in range(target_epochs):
            for i, group in enumerate(train_data):
                self.batches_per_group = int(int(samples * (1 - ratio)) / splits / self.batch_size)


                if ep in aug_epochs:
                    for x in self.optimizer.param_groups:
                        x['lr'] = self.lrn_rate2
                        x['weight_decay'] = self.wdecay2
                    aug_data = aug_imgs(group.copy())
                    tdata = DataLoader(aug_data, batch_size=self.batch_size, shuffle=True, generator=torch.Generator(device='cuda'))
                else:
                    for x in self.optimizer.param_groups:
                        x['lr'] = self.lrn_rate1
                        x['weight_decay'] = self.wdecay1
                    tdata = DataLoader(group, batch_size=self.batch_size, shuffle=True, generator=torch.Generator(device='cuda'))
                train_loss = self._train(tdata)
                vdata = DataLoader(val_data, batch_size=self.batch_size, shuffle=True, generator=torch.Generator(device='cuda'))
                val_loss, predictions = self._validate(vdata)


                print(f'Status at batch {self.batches_per_group * (1 + i)},\tTLoss: {round(float(train_loss), 5)}\tVLoss: {round(float(val_loss), 5)}')
                mac_prec, mac_rec, f1_mac, f1_mic, mcc, ck = print_metrics(predictions)
                self.tblog.add_scalar('Macro Avg Precision / Epoch', mac_prec / (ep + 1), ep + 1)
                self.tblog.add_scalar('Macro Avg Recall / Epoch', mac_rec / (ep + 1), ep + 1)
                self.tblog.add_scalar('F1 Macro / Epoch', f1_mac / (ep + 1), ep + 1)
                self.tblog.add_scalar('F1 Micro / Epoch', f1_mic / (ep + 1), ep + 1)
                self.tblog.add_scalar('Mattheus Correlation Coefficient / Epoch', mcc / (ep + 1), ep + 1)
                self.tblog.add_scalar('Cohens Kappa / Epoch', ck / (ep + 1), ep + 1)


                if val_loss < self.save_threshold:
                    self.save_threshold = val_loss
                    torch.save(self.model.state_dict(), f'models/{self.model_name}')

                    
        self.model = self.model.to('cpu')
        del dataset
        self.tblog.close()
        torch.cuda.empty_cache()

def group_predict(adata: list[list[np.ndarray]], model: nn.Module) -> list[tuple[int, int]]:
    predictions = []
    model.to('cuda')
    model.eval()
    with torch.no_grad():
        for song in adata:
            results = []
            for frame in song[0]:
                spctr, ftr = frame[0].to('cuda'), frame[1].to('cuda')
                results.append(torch.nn.functional.softmax(*model((spctr, ftr)), dim=0).tolist())
            scores = [round(sum(w), 4) for w in  np.array(results).T]
            predictions.append((song[1], scores.index(np.max(scores))))
    model.to('cpu')
    return predictions

def aug_imgs(img_data: list[tuple[torch.Tensor, int]]) -> list[tuple[torch.Tensor, int]]:
    aug_count = int(len(img_data) * 0.3678)
    change = img_data[:aug_count]
    final = img_data[aug_count:]
    change = [((x[0][0] + 55).to(torch.uint8), x[0][1], x[1]) for x in change]
    new_pack = []
    for item in change:
        label = item[1], item[2]
        img = np.random.choice(transforms)(item[0])
        if img.size() != (140, 128):
            img = torchvision.transforms.functional.resize(img, (140, 128))
        new_pack.append((img, label))
    new_pack = [((x[0].to(torch.float32) - 55, x[1][0]), x[1][1]) for x in new_pack]
    final.extend(new_pack)
    return final

def load_test(filename, vmode=False):
    with open(f'data/{filename}', 'rb') as f:
        adata = pickle.load(f)
    with open('data/train_labels', 'rt') as f:
        labels = f.readlines()
    labels = [x.split(',') for x in labels]
    labels = {x[1]: int(x[3]) for x in labels}
    temp = []
    for x in adata:
        if vmode:
            temp.append(([(torch.tensor(y, dtype=torch.float32).unsqueeze(0).unsqueeze(0), torch.tensor(x[1], dtype=torch.float32).unsqueeze(0).unsqueeze(0)) for y in x[0]], labels[x[2]]))
        else:
            temp.append(([(torch.tensor(y, dtype=torch.float32).unsqueeze(0).unsqueeze(0), torch.tensor(x[1], dtype=torch.float32).unsqueeze(0).unsqueeze(0)) for y in x[0]], x[2]))
    return temp

def load_train(filename):
    with open('data/train_labels', 'rt') as f:
        labels = f.readlines()
    labels = [x.split(',') for x in labels]
    labels = {x[1]: int(x[3]) for x in labels}
    with open(f'data/{filename}', 'rb') as f:
        adata = pickle.load(f)
    temp = []
    for x in adata:
        temp.extend([((torch.tensor(y, dtype=torch.float32).unsqueeze(0), torch.tensor(x[1], dtype=torch.float32).unsqueeze(0)), labels[x[2]]) for y in x[0]])
    return temp

def print_metrics(predictions):
    print(f'Label\t\tRecall\t\tPrecision\tF1')
    metrics = []
    for x in range(19):
        gcount = 0
        guesses = [y for y in predictions if y[1] == x]
        actual = [y for y in predictions if y[0] == x]
        for z in actual:
            if z[0] == z[1]:
                gcount += 1
        metrics.append((len(actual), len(guesses), gcount))
        print(f'{x}\t\t{round(gcount / len(actual), 3)}\t\t{round(gcount / len(guesses), 3)}\t\t{2*(((round(gcount / len(guesses), 3))*(round(gcount / len(actual), 3)))/((round(gcount / len(guesses), 3))+(round(gcount / len(actual), 3))))}')

    total = len(predictions)
    correct = sum([x[2] for x in metrics])
    mprec = round(np.mean([x[2] / x[1] for x in metrics]), 4)
    mrec = round(np.mean([x[2] / x[0] for x in metrics]), 4)
    f1mac = round(2 * ((mprec * mrec) / (mprec**-1 + mrec**-1)), 4)
    f1mic = round(correct / total, 4)
    mcc = round((correct * total - sum([x[0] * x[1] for x in metrics])) / np.sqrt((total**2 - sum(x[0]**2 for x in metrics))*(total**2 - sum(x[1]**2 for x in metrics))), 4)
    ck = round((correct * total - sum([x[1] * x[0] for x in metrics])) / (total**2 - sum([x[1] * x[0] for x in metrics])), 3)
    print('\n')
    print(f"Macro Precision\t\t{mprec}")
    print(f'Macro Recall\t\t{mrec}')
    print(f'F1 Macro\t\t{f1mac}')
    print(f'F1 Micro\t\t{f1mic}')
    print(f'MCC\t\t\t{mcc}')
    print(f'CK\t\t\t{ck}')
    print(correct, '/', total)
    return mprec, mrec, f1mac, f1mic, mcc, ck

blur = torchvision.transforms.GaussianBlur(3, sigma=(0.1, 2.0))
erase = torchvision.transforms.RandomErasing(p=0.5, scale=(0.02, 0.1), ratio=(0.8, 1.2), value=0, inplace=False)
crop = torchvision.transforms.RandomResizedCrop((140, 128), scale=(0.84, 1.0))
affine = torchvision.transforms.RandomAffine(0, translate=(0.2, 0), scale=(0.86, 1.14), shear=None, fill=0)
invert = torchvision.transforms.RandomInvert()
transforms = [blur, erase, crop, affine, invert]
proj_name = 'Music_Classifier'


In [3]:
audio_data = load_train('f3even')

In [4]:
train_session = 1
av_model = MusCls(32, 64, 128, 128)
print(sum(x.numel() for x in av_model.parameters() if x.requires_grad))

#av_model.load_state_dict(torch.load('models/mscls_x'))
trainer = Trainer(av_model, f'mscls_6_{train_session}', 3e-4, 2e-4, 1e-5, 2e-5, 1.3)
trainer.train_model(audio_data, 3, 32, 0.09, 1, [2])

2169251
Status at batch 4241,	TLoss: 1.66317	TPAccuracy 18.95367%	VLoss: 1.56698	VGAccuracy 46.94196%	VPAccuracy 20.86734%
Status at batch 4241,	TLoss: 1.45418	TPAccuracy 23.35914%	VLoss: 1.50979	VGAccuracy 49.00298%	VPAccuracy 22.09575%
Status at batch 4241,	TLoss: 1.41957	TPAccuracy 24.18168%	VLoss: 1.50163	VGAccuracy 50.66964%	VPAccuracy 22.27676%


In [23]:
torch.save(av_model.state_dict(), 'models/mscls_6_0')

In [None]:
del av_model
del trainer

In [None]:
av_model = MusCls(32, 64, 128, 128)
av_model.load_state_dict(torch.load('models/mscls_6_0'))
test_data = load_test('f3eval', True)
predictions = group_predict(test_data, av_model)





In [None]:
np.mean([x[2] / x[1] for x in metrics])
np.mean([x[2] / x[0] for x in metrics])

In [19]:
#av_model = MusCls(32, 64, 128, 128)
#av_model.load_state_dict(torch.load('models/mscls_5_1'))
test_data = load_test('f3test', False)
predictions = group_predict(test_data, av_model)
savename = 'f3'

with open(f'predictions/{savename}.csv', 'wt') as f:
    f.writelines(['song_id,genre_id\n'])
    f.writelines([f'{x[0].lstrip("0").rstrip(".ogg")},{x[1]}\n' for x in predictions])
    f.writelines(['24013,0\n', '22612,1\n'])