# Packages

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/COSRMAL_CHALLENGE/CORSMAL-Challenge-2022-Squids

/content/drive/MyDrive/COSRMAL_CHALLENGE/CORSMAL-Challenge-2022-Squids


In [3]:
import scipy
import librosa
import pandas as pd
import os
import numpy as np
from tqdm.notebook import tqdm
import scipy.io.wavfile
import time
import IPython
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import Subset
import json
from utils import AudioProcessing, audioPreprocessing, voting
from dataset import audioDataSet
from models import *
from helper import train_audio, evaluate_audio


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


# Data Preprocessing

In [None]:
gt = pd.read_csv('./files/train.csv')
gt.head()

Unnamed: 0,id,container id,scenario,background,illumination,width at the top,width at the bottom,height,depth,container capacity,container mass,filling type,filling level,filling density,filling mass,object mass,handover starting frame,handover start timestamp,handover hand,action,nframes,folder_num,file_name,num,subject,filling_type,filling_level,back,light,camera_id,start,end
0,0,2,2,1,0,69.0,42.0,72.0,-1.0,185.0,2.0,2,1,0.82,76.0,78.0,-1,-1,-1,1.0,291576,2,s2_fi2_fu1_b1_l0,70,2,2,1,1,0,2,0.75,3.5
1,1,7,0,0,0,193.0,193.0,241.0,69.0,3209.397,59.0,0,0,0.0,0.0,59.0,-1,-1,-1,0.0,118483,7,s0_fi0_fu0_b0_l0,0,0,0,0,0,0,2,-1.0,-1.0
2,2,2,0,1,0,69.0,42.0,72.0,-1.0,185.0,2.0,3,1,1.0,93.0,95.0,-1,-1,-1,1.0,572008,2,s0_fi3_fu1_b1_l0,22,0,3,1,1,0,2,3.4,6.5
3,3,8,0,1,0,135.0,135.0,164.0,56.0,1239.84,31.0,0,0,0.0,0.0,31.0,-1,-1,-1,0.0,141680,8,s0_fi0_fu0_b1_l0,2,0,0,0,1,0,2,-1.0,-1.0
4,4,4,1,1,0,88.0,56.0,91.0,-1.0,296.0,86.0,1,1,0.34,45.0,131.0,-1,-1,-1,1.0,138681,4,s1_fi1_fu1_b1_l0,34,1,1,1,1,0,2,0.75,1.8


In [None]:
base_path = '/content/'
audio_folder = '/content/drive/MyDrive/COSRMAL_CHALLENGE/train/audio'
mfcc_path = (os.path.join(base_path, 'audios', 'mfcc'))
os.makedirs(mfcc_path, exist_ok=True)
os.makedirs(os.path.join(base_path, 'audios'), exist_ok=True)

audioPreprocessing(audio_folder, gt, base_path, mfcc_path)

  0%|          | 0/684 [00:00<?, ?it/s]

# Train

In [None]:
mydataset = audioDataSet(base_path)

Dataset initializing...


  0%|          | 0/31812 [00:00<?, ?it/s]

## Net

In [None]:
bs = 100
train_split = 0.8
lr = 1e-5
epochs = 200
n_samples = len(mydataset)
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = int(train_split * n_samples)
num_val = n_samples - num_train

train_set, val_set = torch.utils.data.random_split(mydataset, [num_train, num_val])

assert len(train_set) == num_train, "Same"
assert len(val_set) == num_val, "Same"


train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True)
for epoch in range(epochs):
  loss_train, correct_train = train_audio(model, train_loader, optimizer, device)
  loss_val, correct_val = evaluate_audio(model, val_loader, device, criterion = nn.CrossEntropyLoss())
  print("Epoch {}/{} train loss:{:.4f} train acc:{:.2f}% ".format(epoch+1,epochs, loss_train, 100 * correct_train/num_train))
  print("Epoch {}/{} val loss:{:.4f} val acc:{:.2f}% ".format(epoch+1,epochs, loss_val, 100 * correct_val/num_val))

  if loss_val < best_loss:
    best_loss = loss_val
    torch.save(model, os.path.join(base_path, 'audios', "best_loss.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model, os.path.join(base_path, 'audios', "best_val.pth"))


Epoch 1/200 train loss:1.0360 train acc:73.44% 
Epoch 1/200 val loss:0.9534 val acc:79.19% 
Epoch 2/200 train loss:0.9431 train acc:80.36% 
Epoch 2/200 val loss:0.9258 val acc:81.64% 
Epoch 3/200 train loss:0.9241 train acc:81.98% 
Epoch 3/200 val loss:0.9357 val acc:80.56% 
Epoch 4/200 train loss:0.9132 train acc:82.80% 
Epoch 4/200 val loss:0.9041 val acc:84.02% 
Epoch 5/200 train loss:0.8858 train acc:86.28% 
Epoch 5/200 val loss:1.0089 val acc:72.62% 
Epoch 6/200 train loss:0.8672 train acc:87.78% 
Epoch 6/200 val loss:0.8978 val acc:84.50% 
Epoch 7/200 train loss:0.8605 train acc:88.34% 
Epoch 7/200 val loss:0.9934 val acc:74.57% 
Epoch 8/200 train loss:0.8526 train acc:89.10% 
Epoch 8/200 val loss:0.8889 val acc:85.34% 
Epoch 9/200 train loss:0.8470 train acc:89.68% 
Epoch 9/200 val loss:0.8650 val acc:87.79% 
Epoch 10/200 train loss:0.8426 train acc:90.15% 
Epoch 10/200 val loss:0.8547 val acc:88.86% 
Epoch 11/200 train loss:0.8390 train acc:90.55% 
Epoch 11/200 val loss:0.8496 

## MobileNet

In [None]:
from models import MobileNetV3_Large

mobile_save = '/content/drive/MyDrive/COSRMAL_CHALLENGE/task2'

bs = 100
train_split = 0.8
lr = 1e-3
epochs = 200
n_samples = len(mydataset)
model = MobileNetV3_Large(input_channel=8, num_classes=4).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = int(train_split * n_samples)
num_val = n_samples - num_train

train_set, val_set = torch.utils.data.random_split(mydataset, [num_train, num_val])

assert len(train_set) == num_train, "Same"
assert len(val_set) == num_val, "Same"


train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True)
for epoch in range(epochs):
  loss_train, correct_train = train_audio(model, train_loader, optimizer, device)
  loss_val, correct_val = evaluate_audio(model, val_loader, device, criterion = nn.CrossEntropyLoss())
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train, 100 * correct_train/num_train,
      loss_val, 100 * correct_val/num_val))

  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(mobile_save, 
                                              'mobile{:.2f}.pth'.format(100 * correct_val/num_val)))
  




1/200 train loss:0.6436 train acc:76.81% val loss:0.6356 val acc:75.44%
2/200 train loss:0.3759 train acc:87.12% val loss:0.8538 val acc:69.65%
3/200 train loss:0.3205 train acc:89.04% val loss:0.3209 val acc:88.75%
4/200 train loss:0.2887 train acc:89.76% val loss:0.3830 val acc:88.10%
5/200 train loss:0.2623 train acc:90.82% val loss:4.0947 val acc:37.92%
6/200 train loss:0.2388 train acc:91.45% val loss:0.5306 val acc:85.60%
7/200 train loss:0.2206 train acc:92.23% val loss:0.4271 val acc:85.93%
8/200 train loss:0.2042 train acc:92.79% val loss:0.3086 val acc:89.66%
9/200 train loss:0.1900 train acc:93.00% val loss:0.3779 val acc:87.02%
10/200 train loss:0.1671 train acc:93.95% val loss:0.4665 val acc:86.23%
11/200 train loss:0.1490 train acc:94.59% val loss:0.4374 val acc:84.21%
12/200 train loss:0.1354 train acc:95.16% val loss:0.4699 val acc:84.11%
13/200 train loss:0.1264 train acc:95.49% val loss:0.5517 val acc:86.75%
14/200 train loss:0.1105 train acc:95.83% val loss:0.5975 va

In [None]:
from models import MobileNetV3_Large, mbv2_ca

mobile_save = '/content/drive/MyDrive/COSRMAL_CHALLENGE/task2/mobileCA'

bs = 100
train_split = 0.8
lr = 1e-3
epochs = 200
n_samples = len(mydataset)
model = mbv2_ca(in_c=8, num_classes=4).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = int(train_split * n_samples)
num_val = n_samples - num_train

train_set, val_set = torch.utils.data.random_split(mydataset, [num_train, num_val])

assert len(train_set) == num_train, "Same"
assert len(val_set) == num_val, "Same"


train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True)
for epoch in range(epochs):
  loss_train, correct_train = train_audio(model, train_loader, optimizer, device)
  loss_val, correct_val = evaluate_audio(model, val_loader, device, criterion = nn.CrossEntropyLoss())
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train, 100 * correct_train/num_train,
      loss_val, 100 * correct_val/num_val))

  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(mobile_save, 
                                              'mobile-ca{:.2f}.pth'.format(100 * correct_val/num_val)))
  




1/200 train loss:0.6129 train acc:77.83% val loss:0.5256 val acc:82.24%
2/200 train loss:0.3848 train acc:86.98% val loss:0.3936 val acc:87.07%
3/200 train loss:0.3113 train acc:89.09% val loss:0.3765 val acc:87.47%
4/200 train loss:0.2804 train acc:90.13% val loss:0.4528 val acc:86.00%
5/200 train loss:0.2646 train acc:90.46% val loss:0.3243 val acc:88.97%
6/200 train loss:0.2361 train acc:91.46% val loss:0.3081 val acc:89.17%
7/200 train loss:0.2193 train acc:92.07% val loss:0.3885 val acc:86.74%
8/200 train loss:0.2048 train acc:92.39% val loss:0.5264 val acc:88.23%
9/200 train loss:0.1860 train acc:93.35% val loss:0.3215 val acc:90.21%
10/200 train loss:0.1710 train acc:93.88% val loss:0.2684 val acc:90.33%
11/200 train loss:0.1490 train acc:94.54% val loss:0.5264 val acc:84.47%
12/200 train loss:0.1292 train acc:95.16% val loss:0.4551 val acc:88.40%
13/200 train loss:0.1265 train acc:95.33% val loss:0.3580 val acc:90.85%
14/200 train loss:0.1154 train acc:95.78% val loss:0.5583 va

## EfficientNet

In [None]:
my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 100
train_split = 0.8
lr = 1e-4
epochs = 200
n_samples = len(mydataset)
model = effnetv2_xl().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = int(train_split * n_samples)
num_val = n_samples - num_train

train_set, val_set = torch.utils.data.random_split(mydataset, [num_train, num_val])

assert len(train_set) == num_train, "Same"
assert len(val_set) == num_val, "Same"


train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  loss_train, correct_train = train_audio(model, train_loader, optimizer, device)
  loss_val, correct_val = evaluate_audio(model, val_loader, device, criterion = nn.CrossEntropyLoss())

  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train, 100 * correct_train/num_train,
      loss_val, 100 * correct_val/num_val))
  
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'efficient',
                                              "XL-{:.2f}.pth".format(100 * correct_val/num_val)))

# Evaluation

In [6]:
model_pth = '/content/drive/MyDrive/COSRMAL_CHALLENGE/task2/mobileCA/mobile-ca96.35.pth'

model_pretrained = mbv2_ca(in_c=8, num_classes=4)
model_pretrained.load_state_dict(torch.load(model_pth))
model_pretrained.to(device)
model_pretrained.eval()

voting_dir = '/content/drive/MyDrive/COSRMAL_CHALLENGE/task2/results'
audio_folder = '/content/drive/MyDrive/COSRMAL_CHALLENGE/test_pub/audio'

voting(audio_folder, voting_dir, model_pretrained, device, save_size=64)

elapsed_time:185.6604723930359sec


In [7]:
f = open(os.path.join(voting_dir, "voting.json"))
vote_js = json.load(f)

vote = pd.DataFrame(vote_js).T
vote

Unnamed: 0,data_num,file,count_pred,final_pred,pred
000000,0,000000,"[6, 0, 0, 0]",0,"[0, 0, 0, 0, 0, 0]"
000001,1,000001,"[10, 17, 2, 0]",1,"[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
000002,2,000002,"[24, 41, 0, 0]",1,"[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, ..."
000003,3,000003,"[46, 0, 0, 43]",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, ..."
000004,4,000004,"[13, 0, 0, 24]",3,"[0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ..."
...,...,...,...,...,...
000223,223,000223,"[67, 0, 0, 27]",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, ..."
000224,224,000224,"[12, 0, 0, 0]",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]"
000225,225,000225,"[56, 0, 0, 23]",3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
000226,226,000226,"[19, 0, 0, 0]",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [9]:
vote.to_csv('vote.csv', index=False)

In [None]:
gt = pd.read_csv('files/train.csv')
acc = np.sum(gt['filling_type'].to_numpy() == vote['final_pred'].to_numpy()) / len(gt['filling_type'])
print('Acc: {:.2f}%'.format(100 * acc))

Acc: 100.00%
