## Data preprocessing

In [1]:
import scipy
import librosa
import pandas as pd
import os
import numpy as np
from tqdm.notebook import tqdm
import scipy.io.wavfile
import time
import IPython
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataset import Subset
import json
from PIL import Image
from torchvision import datasets, transforms
from dataset import MiniDataset
from loss import computeScoreType1, myLoss
from models import Net, effnetv2_xl, MobileNetV3_Large
from helper import train_image, evaluate_image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

In [2]:
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/crops_rgb_train.zip /content/crops_rgb_train.zip
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/crops_rgb_test.zip /content/crops_rgb_test.zip
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/crops_depth_train.zip /content/crops_depth_train.zip
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/crops_depth_test.zip /content/crops_depth_test.zip
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/labels_test.zip /content/labels_test.zip
!cp /content/drive/MyDrive/COSRMAL_CHALLENGE/RGBD/labels_train.zip /content/labels_train.zip


In [None]:
os.makedirs('/content/train',exist_ok=True)
os.makedirs('/content/test',exist_ok=True)
!unzip /content/crops_rgb_train.zip -d /content/train/
!unzip /content/crops_rgb_test.zip -d /content/test/
!unzip /content/crops_depth_train.zip -d /content/train/
!unzip /content/crops_depth_test.zip -d /content/test/
!unzip /content/labels_train.zip -d /content/train/
!unzip /content/labels_test.zip -d /content/test

# Train

## Capacity

In [127]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 2
train_split = 0.8
lr = 1e-4
epochs = 50
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb')
val_set = MiniDataset('/content/test/',
                        '/content/test/labels', 
                        '/content/test/crops_depth',
                        '/content/test/crops_rgb')
model = effnetv2_xl(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)



train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train / num_train, 100 * correct_train/num_train,
      loss_val /num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "XL-my{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/50 train loss:0.5427 train acc:31.11% val loss:0.5720 val acc:26.78%
2/50 train loss:0.4562 train acc:48.15% val loss:0.4846 val acc:38.49%
3/50 train loss:0.3094 train acc:69.06% val loss:0.3300 val acc:67.00%
4/50 train loss:0.2892 train acc:71.08% val loss:0.3078 val acc:69.22%
5/50 train loss:0.2902 train acc:70.98% val loss:0.3136 val acc:68.64%
6/50 train loss:0.2866 train acc:71.34% val loss:0.3190 val acc:68.10%
7/50 train loss:0.2842 train acc:71.58% val loss:0.3055 val acc:69.45%
8/50 train loss:0.2883 train acc:71.17% val loss:0.3106 val acc:68.94%
9/50 train loss:0.2878 train acc:71.22% val loss:0.3123 val acc:68.77%
10/50 train loss:0.2875 train acc:71.25% val loss:0.3227 val acc:67.73%
11/50 train loss:0.2860 train acc:71.40% val loss:0.3280 val acc:67.20%
12/50 train loss:0.2822 train acc:71.78% val loss:0.3241 val acc:67.59%
13/50 train loss:0.2841 train acc:71.59% val loss:0.3093 val acc:69.07%
14/50 train loss:0.2835 train acc:71.65% val loss:0.31

In [124]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 30
train_split = 0.8
lr = 1e-3
epochs = 100
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb')
val_set = MiniDataset('/content/test/',
                        '/content/test/labels', 
                        '/content/test/crops_depth',
                        '/content/test/crops_rgb')
model = MobileNetV3_Large(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)





train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train/num_train, 100 * correct_train/num_train,
      loss_val/num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "mobile{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/100 train loss:0.5963 train acc:24.71% val loss:0.5315 val acc:35.67%
2/100 train loss:0.4706 train acc:38.27% val loss:0.7242 val acc:0.60%
3/100 train loss:0.3704 train acc:55.41% val loss:0.3444 val acc:60.08%
4/100 train loss:0.3108 train acc:65.45% val loss:0.4562 val acc:53.52%
5/100 train loss:0.2637 train acc:72.20% val loss:0.2807 val acc:71.67%
6/100 train loss:0.2341 train acc:75.72% val loss:0.4366 val acc:55.67%
7/100 train loss:0.2190 train acc:77.07% val loss:0.2541 val acc:74.59%
8/100 train loss:0.1995 train acc:79.21% val loss:0.2256 val acc:77.44%
9/100 train loss:0.1982 train acc:79.42% val loss:0.3874 val acc:61.26%
10/100 train loss:0.2175 train acc:77.48% val loss:0.5762 val acc:41.36%
11/100 train loss:0.2191 train acc:77.38% val loss:0.2387 val acc:76.13%
12/100 train loss:0.2662 train acc:72.18% val loss:0.2828 val acc:71.19%
13/100 train loss:0.2357 train acc:75.66% val loss:0.5781 val acc:41.99%
14/100 train loss:0.2013 train acc:79.18% 

## Mass estimation

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 30
train_split = 0.8
lr = 1e-4
epochs = 100
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb',
                        ['container mass']
                        )
val_set = MiniDataset('/content/test/',
                      '/content/test/labels', 
                      '/content/test/crops_depth',
                      '/content/test/crops_rgb',
                      ['container mass']
                       
                      )
model = MobileNetV3_Large(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)





train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train/num_train, 100 * correct_train/num_train,
      loss_val/num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "mobile-mass{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/100 train loss:0.5667 train acc:37.74% val loss:0.5231 val acc:44.64%
2/100 train loss:0.5083 train acc:43.19% val loss:0.5097 val acc:47.50%
3/100 train loss:0.4908 train acc:46.11% val loss:0.5032 val acc:48.44%
4/100 train loss:0.4784 train acc:47.61% val loss:0.4909 val acc:49.17%
5/100 train loss:0.4683 train acc:49.29% val loss:0.4888 val acc:50.77%
6/100 train loss:0.4605 train acc:51.14% val loss:0.4803 val acc:51.59%
7/100 train loss:0.4529 train acc:52.43% val loss:0.4681 val acc:52.47%
8/100 train loss:0.4413 train acc:53.27% val loss:0.4599 val acc:52.16%
9/100 train loss:0.4282 train acc:54.98% val loss:0.4517 val acc:53.61%
10/100 train loss:0.4172 train acc:55.60% val loss:0.4335 val acc:56.07%
11/100 train loss:0.4110 train acc:56.66% val loss:0.4307 val acc:56.48%
12/100 train loss:0.3991 train acc:58.20% val loss:0.4280 val acc:56.90%
13/100 train loss:0.3940 train acc:59.06% val loss:0.4195 val acc:57.86%
14/100 train loss:0.3837 train acc:60.33%

## height

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 30
train_split = 0.8
lr = 1e-3
epochs = 100
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb',
                        ['height']
                        )
val_set = MiniDataset('/content/test/',
                      '/content/test/labels', 
                      '/content/test/crops_depth',
                      '/content/test/crops_rgb',
                      ['height']
                       
                      )
model = MobileNetV3_Large(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)





train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train/num_train, 100 * correct_train/num_train,
      loss_val/num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "mobile-height{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/100 train loss:0.5241 train acc:34.56% val loss:0.6423 val acc:14.70%
2/100 train loss:0.3415 train acc:58.34% val loss:0.7800 val acc:0.76%
3/100 train loss:0.2358 train acc:74.05% val loss:0.2700 val acc:72.25%
4/100 train loss:0.1861 train acc:80.48% val loss:0.1998 val acc:80.02%
5/100 train loss:0.1459 train acc:84.69% val loss:0.2061 val acc:79.39%
6/100 train loss:0.1383 train acc:85.53% val loss:0.1615 val acc:83.85%
7/100 train loss:0.1295 train acc:86.39% val loss:0.1569 val acc:84.31%
8/100 train loss:0.1238 train acc:86.97% val loss:0.2292 val acc:77.08%
9/100 train loss:0.1379 train acc:85.56% val loss:0.1402 val acc:85.98%
10/100 train loss:0.1278 train acc:86.59% val loss:0.1362 val acc:86.38%
11/100 train loss:0.1339 train acc:85.93% val loss:0.2093 val acc:78.79%
12/100 train loss:0.1228 train acc:87.07% val loss:0.1471 val acc:85.29%
13/100 train loss:0.1266 train acc:86.69% val loss:0.1381 val acc:86.19%
14/100 train loss:0.1209 train acc:87.27% 

KeyboardInterrupt: ignored

In [None]:
!nvidia-smi

## Width top

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 30
train_split = 0.8
lr = 1e-3
epochs = 100
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb',
                        ['width at the top']
                        )
val_set = MiniDataset('/content/test/',
                      '/content/test/labels', 
                      '/content/test/crops_depth',
                      '/content/test/crops_rgb',
                      ['width at the top']
                       
                      )
model = MobileNetV3_Large(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)





train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train/num_train, 100 * correct_train/num_train,
      loss_val/num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "mobile-wt{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/100 train loss:0.5314 train acc:39.50% val loss:0.6005 val acc:23.41%
2/100 train loss:0.2665 train acc:69.09% val loss:0.6406 val acc:34.77%
3/100 train loss:0.1906 train acc:79.53% val loss:0.5340 val acc:44.57%
4/100 train loss:0.1843 train acc:80.41% val loss:0.2215 val acc:77.22%
5/100 train loss:0.1499 train acc:84.31% val loss:0.3014 val acc:69.86%
6/100 train loss:0.1405 train acc:85.28% val loss:0.1717 val acc:82.83%
7/100 train loss:0.1491 train acc:84.38% val loss:0.5779 val acc:42.21%
8/100 train loss:0.1401 train acc:85.34% val loss:0.1335 val acc:86.65%
9/100 train loss:0.1587 train acc:83.36% val loss:0.1406 val acc:85.94%
10/100 train loss:0.1332 train acc:86.04% val loss:0.1495 val acc:85.05%
11/100 train loss:0.1254 train acc:86.82% val loss:0.1280 val acc:87.20%
12/100 train loss:0.1159 train acc:87.76% val loss:0.1435 val acc:85.65%
13/100 train loss:0.1275 train acc:86.57% val loss:0.3790 val acc:62.10%
14/100 train loss:0.1245 train acc:86.88%

## Width bottom

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

my_save_path = '/content/drive/MyDrive/COSRMAL_CHALLENGE'
bs = 30
train_split = 0.8
lr = 1e-3
epochs = 100
train_set = MiniDataset('/content/train/',
                        '/content/train/labels', 
                        '/content/train/crops_depth',
                        '/content/train/crops_rgb',
                        ['width at the bottom']
                        )
val_set = MiniDataset('/content/test/',
                      '/content/test/labels', 
                      '/content/test/crops_depth',
                      '/content/test/crops_rgb',
                      ['width at the bottom']
                       
                      )
model = MobileNetV3_Large(num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr,  weight_decay=1e-5)

best_loss = float('inf')
best_acc = 0

num_train = len(train_set)
num_val = len(val_set)





train_loader   = DataLoader(train_set,
                            batch_size=bs,
                            shuffle=True,
                            num_workers=1,
                            drop_last=True)
val_loader   = DataLoader(val_set,
                          batch_size=bs,
                          shuffle=True,
                          num_workers=1)

for epoch in range(epochs):
  #start_time = time.time()
  loss_train, correct_train = train_image(model, train_loader, optimizer, device, criterion = myLoss)
  loss_val, correct_val = evaluate_image(model, val_loader, device, myLoss)
  #elapsed_time = time.time() - start_time
  print("{}/{} train loss:{:.4f} train acc:{:.2f}% val loss:{:.4f} val acc:{:.2f}%".format(
      epoch+1,epochs, loss_train/num_train, 100 * correct_train/num_train,
      loss_val/num_val, 100 * correct_val/num_val))
  

  # if loss_val < best_loss:
  #   best_loss = loss_val
  #   torch.save(model, os.path.join(base_path, 'audios', "bl-efficient-xl.pth"))
  
  if correct_val > best_acc:
    best_acc = correct_val
    torch.save(model.state_dict(), os.path.join(my_save_path, 
                                              'audios', 
                                              'RGBD',
                                              "mobile-wb{:.2f}.pth".format(100 * correct_val/num_val)))



Using device: cuda
1/100 train loss:0.4981 train acc:40.20% val loss:0.3756 val acc:55.33%
2/100 train loss:0.2650 train acc:69.72% val loss:0.2494 val acc:73.34%
3/100 train loss:0.2077 train acc:77.56% val loss:0.8076 val acc:1.79%
4/100 train loss:0.1845 train acc:80.56% val loss:0.6332 val acc:34.38%
5/100 train loss:0.1622 train acc:82.80% val loss:0.1551 val acc:84.49%
6/100 train loss:0.1621 train acc:82.82% val loss:0.1393 val acc:85.95%
7/100 train loss:0.1503 train acc:83.98% val loss:0.3631 val acc:61.43%
8/100 train loss:0.1444 train acc:84.65% val loss:0.1260 val acc:87.15%
9/100 train loss:0.1362 train acc:85.55% val loss:0.1376 val acc:86.24%
10/100 train loss:0.1363 train acc:85.52% val loss:0.1353 val acc:86.47%
11/100 train loss:0.1459 train acc:84.48% val loss:0.1963 val acc:80.37%
12/100 train loss:0.1396 train acc:85.22% val loss:0.1243 val acc:87.30%
13/100 train loss:0.1306 train acc:86.06% val loss:0.1328 val acc:86.59%
14/100 train loss:0.1272 train acc:86.48% 