<a href="https://colab.research.google.com/github/B0neh3ad/maicon-2023-rokafnet/blob/main/ishlove77/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets

def train_fn(model, train_loader, optimizer, criterion):
    model.train()

    losses = 0
    tqdm_bar = tqdm(train_loader)
    for batch_idx, (audio, label) in enumerate(tqdm_bar):
        audio = audio.to(CFG['device'])
        label = label.to(CFG['device'])

        output = model(audio)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        losses += loss.detach().cpu().item()
        optimizer.step()

        tqdm_bar.set_description("Epoch {} - train loss: {:.6f}".format(epoch, losses/(batch_idx+1)))


def evaluate_fn(model, valid_loader, criterion, scheduler=None):
    model.eval()

    valid_loss = 0
    correct = 0
    tqdm_bar = tqdm(valid_loader)
    with torch.no_grad():
        for batch_idx, (audio, label) in enumerate(tqdm_bar):
            audio = audio.to(CFG['device'])
            label = label.to(CFG['device'])

            output = model(audio)
            valid_loss += criterion(output, label).item()

            prediction = output.argmax(1, keepdim=True)
            label = label.argmax(1, keepdim=True)

            correct += (prediction == label).sum().item()

    valid_loss /= len(valid_loader.dataset)
    valid_accuracy = 100. * correct / len(valid_loader.dataset)
    if scheduler is not None:
        scheduler.step(valid_loss)
    return valid_loss, valid_accuracy

In [None]:
from sklearn.model_selection import KFold

working_dir = '/kaggle/working'
kfold = KFold(n_splits=5, shuffle=True, random_state=CFG['seed'])

wandb.init(
    project = 'speaker-recognition',
    config = CFG,
    settings = wandb.Settings(code_dir=working_dir),
)

for fold, (train_idx, valid_idx) in enumerate(kfold.split(train_df)):
    model = get_model().to(CFG['device'])

    train, valid = train_df.iloc[train_idx], train_df.iloc[valid_idx]

    train = train.reset_index(drop=True)
    valid = valid.reset_index(drop=True)

    train_dataset = CustomDataset(df=train, train=True, augmentation=True)
    valid_dataset = CustomDataset(df=valid, train=True, augmentation=False)

    train_loader = DataLoader(dataset=train_dataset,
                          batch_size=CFG['batch_size'],
                          shuffle=True, num_workers=4)
    valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=CFG['batch_size'],
                          shuffle=False, num_workers=4)

    best_valid_accuracy = 0
    best_model = model
    for epoch in range(1, CFG['epochs'] + 1):
        # train
        optimizer = torch.optim.Adam(model.parameters(),lr= CFG['lr'])
        criterion = FocalLoss()
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=1, verbose=True)
        train_fn(model, train_loader, optimizer, criterion)

        # valid
        valid_loss, valid_accuracy = evaluate_fn(model, valid_loader, criterion, scheduler)

        print("\n[EPOCH: {}], \tModel: ConvNet, \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
            epoch, valid_loss, valid_accuracy))
        wandb.log({'valid_loss': valid_loss, 'valid_accuracy': valid_accuracy}, step=epoch)

        # save best model
        if best_valid_accuracy < valid_accuracy:
            best_valid_accuracy = valid_accuracy
            best_model = model

    # export best model
    model_path = os.path.join(working_dir, f'best_model.pth')
    torch.save(best_model.state_dict(), model_path)

    # wandb setting
    wandb.save(model_path, base_path=working_dir)
    wandb.finish()