In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import transforms
from torchsummary import summary
import torch.nn.functional as F
import torchaudio
from torch.utils.data.sampler import SubsetRandomSampler
import os
import datetime
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import utils.utils_base as uBase
from models.KSL_EFFI_v3 import EfficientNet
from dataset_MT_KWS import MC_MT_KWS_Dataset, Overlap_interference
from IPython.display import Audio
from dataset_MT_newData import new_Dataset

random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)

encoder_ch = [[6,64],[64,128],[128,256],[256,256]]
net = EfficientNet().cuda()
learning_rate = 0.0005


optimizer = optim.Adam(net.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14,18,23], gamma=0.1)

coarse_loss = nn.CrossEntropyLoss(reduction="sum")
fine_loss = nn.MSELoss(reduction="sum")
cls_loss = nn.BCELoss(reduction="sum")
ext_loss = nn.L1Loss(reduction="sum")
b = 0

annotation = "newData3.csv"

train_annotation = 'newData3_train.csv'
valid_annotation = 'newData3_valid.csv'
test_annotation = 'newData3_test.csv'

train_dataset = new_Dataset(annotations_file=train_annotation)
valid_dataset = new_Dataset(annotations_file=valid_annotation)
test_dataset = new_Dataset(annotations_file=test_annotation)


batch_size = 32
train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size)
valid_dataloader = torch.utils.data.DataLoader(test_dataset,
                                               batch_size=10)

for i in range(27, 36):
    net = EfficientNet().cuda()
    print(i)
    path = "weight/20230530-133newData/" + str(i) + ".pt"
    net.load_state_dict(torch.load(path))

    net.eval()
    accuracy = 0.0
    loss = 0
    CoarW = 0. 
    FineW = 0.
    ExtW = 0.
    ClsW = 1.
    b1 = 0
    predicts = []
    gts = []

    coeff_total = CoarW + FineW + ExtW + ClsW
    CoarW = CoarW / coeff_total
    FineW = FineW / coeff_total
    ExtW = ExtW / coeff_total
    ClsW = ClsW / coeff_total
    class_acc = 0.0
    doa_err = 0.0
    doa_acc = 0.0
    count = 0.0
    tloss_ext = 0.
    tloss_cls = 0.
    tloss_coar = 0.
    tloss_fine = 0.
    total_loss = 0.0

    for batch_index, (x_sg, y_region, y_angle, y_class, bce, _) in enumerate(valid_dataloader):
        x_sg = x_sg.cuda()
        y_region = y_region.cuda()
        y_angle = y_angle.cuda()
        y_class = y_class.cuda()
        bce = bce.cuda()

        out_localizer1, out_localizer2, out_detector = net(x_sg)

        #loss_ext = ext_loss(y_class.unsqueeze(1).unsqueeze(1) * torch.tanh(out_extractor), y_sg)
        loss_cls = cls_loss(torch.sigmoid(out_detector), y_class.unsqueeze(1))
        loss_coarse = coarse_loss(y_class.unsqueeze(1) * out_localizer1, (y_class * y_region).long())
        loss_fine = fine_loss(y_class.unsqueeze(1) * bce * torch.sigmoid(out_localizer2), y_class.unsqueeze(1) * y_angle)

        #tloss_ext += loss_ext.item()
        tloss_cls += loss_cls.item()
        tloss_coar += loss_coarse.item()
        tloss_fine += loss_fine.item()

        loss = ClsW * loss_cls + \
            CoarW * loss_coarse + \
            FineW * loss_fine 

        total_loss += loss.item()
        _, region_pidx = out_localizer1.max(1)
        accuracy += region_pidx.eq(y_region).sum()

        class_acc += (torch.sigmoid(out_detector) > 0.5).squeeze(1).eq(y_class).sum()
        a, b, c = uBase.DOA(y_region, y_angle, out_localizer1, out_localizer2, 20, y_class, torch.sigmoid(out_detector))

        doa_err += a
        doa_acc += b
        count += c
        predicts.append(torch.sigmoid(out_detector).detach().cpu()[0])
        gts.append(y_class.detach().cpu())
    print('Val Result: Acc: {:0.4f}, C_ACC: {:0.4f}, DOA: {:0.4f}, ACC_k: {:0.4f}'.format(
            accuracy.float() / (len(test_dataset)),
            class_acc.float() / (len(test_dataset)),
            doa_err / count,
            doa_acc / count
        ))

27
Val Result: Acc: 0.7497, C_ACC: 0.9430, DOA: 12.5635, ACC_k: 0.8972
28
Val Result: Acc: 0.7488, C_ACC: 0.9451, DOA: 12.3970, ACC_k: 0.8986
29
Val Result: Acc: 0.7487, C_ACC: 0.9435, DOA: 12.5144, ACC_k: 0.8978
30
Val Result: Acc: 0.7472, C_ACC: 0.9442, DOA: 12.6404, ACC_k: 0.8958
31
Val Result: Acc: 0.7493, C_ACC: 0.9430, DOA: 12.5172, ACC_k: 0.8978
32
Val Result: Acc: 0.7510, C_ACC: 0.9447, DOA: 12.3919, ACC_k: 0.9002
33
Val Result: Acc: 0.7477, C_ACC: 0.9440, DOA: 12.5627, ACC_k: 0.8956
34
Val Result: Acc: 0.7497, C_ACC: 0.9440, DOA: 12.4957, ACC_k: 0.8998
35
Val Result: Acc: 0.7503, C_ACC: 0.9435, DOA: 12.3818, ACC_k: 0.9006


In [5]:
for threshold in [0.5]:
    predict2 = []
    gt2 = []
    for i, j in enumerate(predicts):
        predict2.append(float(j[0]))
        gt2.append(float(gts[i]))

    predict2 = np.array(predict2)
    gt2 = np.array(gt2)

    predict2[predict2 > 0.5] = 1
    predict2[predict2 <= 0.5] = 0
    gt2[gt2==0] = 0


    confusion_dictionary = {'TP':0, 'TN':0, 'FP':0, 'FN':0}
    for i in range(len(predict2)):
        #print(predict2[i], gt2[i])
        if predict2[i] and gt2[i]:
            confusion_dictionary['TP'] += 1
        elif not predict2[i] and not gt2[i]:
            confusion_dictionary['TN'] += 1
        elif predict2[i] and not gt2[i]:
            confusion_dictionary['FP'] += 1
        elif not predict2[i] and gt2[i]:
            confusion_dictionary['FN'] += 1

    predict2 = predict2 > 0.3
    correct = sum(predict2 == gt2)
    accuracy = correct / len(gt2)
    recall = confusion_dictionary['TP'] / (confusion_dictionary['TP'] + confusion_dictionary['FN'])
    precision = confusion_dictionary['TP'] / (confusion_dictionary['TP'] + confusion_dictionary['FP'])
    print(f'accuracy: {accuracy*100}, recall: {recall}, precision: {precision}')

accuracy: 93.86, recall: 0.9221035982601818, precision: 0.954954954954955


In [None]:
2/8
35: C_ACC: 0.9482, DOA: 15.3921, ACC_k: 0.8675
34: C_ACC: 0.9475, DOA: 15.5010, ACC_k: 0.8681
33: C_ACC: 0.9484, DOA: 15.4654, ACC_k: 0.8667
32: C_ACC: 0.9488, DOA: 15.5006, ACC_k: 0.8681
31: C_ACC: 0.9487, DOA: 15.4353, ACC_k: 0.8689
30: C_ACC: 0.9499, DOA: 15.3966, ACC_k: 0.8673

In [None]:
3/7
35: C_ACC: 0.9479, DOA: 14.4433, ACC_k: 0.8780
34: C_ACC: 0.9468, DOA: 14.5302, ACC_k: 0.8774
33: C_ACC: 0.9471, DOA: 14.3725, ACC_k: 0.8774
32: C_ACC: 0.9456, DOA: 14.3308, ACC_k: 0.8796
31: C_ACC: 0.9470, DOA: 14.5621, ACC_k: 0.8764
30: C_ACC: 0.9468, DOA: 14.6805, ACC_k: 0.8749

In [None]:
4/6
35: C_ACC: 0.9448, DOA: 13.9235, ACC_k: 0.8808
34: C_ACC: 0.9449, DOA: 13.8223, ACC_k: 0.8810
33: C_ACC: 0.9452, DOA: 13.8171, ACC_k: 0.8820
32: C_ACC: 0.9441, DOA: 13.9488, ACC_k: 0.8812
31: C_ACC: 0.9453, DOA: 14.0688, ACC_k: 0.8808
30: C_ACC: 0.9450, DOA: 13.9199, ACC_k: 0.8832

In [None]:
5/5
35: C_ACC: 0.9441, DOA: 13.2147, ACC_k: 0.8911
34: C_ACC: 0.9440, DOA: 13.2691, ACC_k: 0.8889
33: C_ACC: 0.9440, DOA: 13.2918, ACC_k: 0.8891
32: C_ACC: 0.9448, DOA: 13.4858, ACC_k: 0.8881
31: C_ACC: 0.9435, DOA: 13.2985, ACC_k: 0.8883
30: C_ACC: 0.9445, DOA: 13.2183, ACC_k: 0.8911

In [None]:
6/4
35: C_ACC: 0.9445, DOA: 13.1932, ACC_k: 0.8899
34: C_ACC: 0.9444, DOA: 13.0334, ACC_k: 0.8915
33: C_ACC: 0.9445, DOA: 13.1024, ACC_k: 0.8924
32: C_ACC: 0.9440, DOA: 13.1143, ACC_k: 0.8903
31: C_ACC: 0.9452, DOA: 13.1621, ACC_k: 0.8924
30: C_ACC: 0.9444, DOA: 13.2463, ACC_k: 0.8893

In [None]:
7/3
35:
34:
33:
32:
31:
30:

In [None]:
8/2
35: C_ACC: 0.9413, DOA: 12.3727, ACC_k: 0.8954
34: C_ACC: 0.9409, DOA: 12.5223, ACC_k: 0.8956
33: C_ACC: 0.9410, DOA: 12.5815, ACC_k: 0.8934
32: C_ACC: 0.9403, DOA: 12.3944, ACC_k: 0.8948
31: C_ACC: 0.9404, DOA: 12.5520, ACC_k: 0.8944
30: C_ACC: 0.9401, DOA: 12.6457, ACC_k: 0.8926

In [5]:
print('Val Result: Acc: {:0.4f}, C_ACC: {:0.4f}, DOA: {:0.4f}, ACC_k: {:0.4f}'.format(
        accuracy.float() / (len(train_dataset)),
        class_acc.float() / (len(train_dataset)),
        doa_err / count,
        doa_acc / count
    ))

Val Result: Acc: 0.1650, C_ACC: 0.9760, DOA: 89.7939, ACC_k: 0.1164


In [8]:
dic = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0}
for batch_index, (x_sg, y_sg, y_region, y_angle, y_class, bce, _) in enumerate(train_dataloader):
    #print(y_class, y_region)
    for yc, yr in zip(y_class, y_region):
        if yc == 1:
            dic[yr.item()] += 1
print(dic)

{0: 6892, 1: 5282, 2: 6246, 3: 6444, 4: 4343, 5: 7123}


In [None]:
SGS-15: Val Result: Acc: 0.1558, C_ACC: 0.8310, DOA: 93.5251, ACC_k: 0.0997
SGL-14: Val Result: Acc: 0.9217, C_ACC: 0.4947, DOA: 8.4955, ACC_k: 0.9548
MT-12: Val Result: Acc: 0.8600, C_ACC: 0.8018, DOA: 10.1661, ACC_k: 0.9251
MT-13: Val Result: Acc: 0.8675, C_ACC: 0.8052, DOA: 9.1426, ACC_k: 0.9342
MT-14: Val Result: Acc: 0.8678, C_ACC: 0.7950, DOA: 7.8524, ACC_k: 0.9563

In [None]:
MT[4,6]-13: Val Result: Acc: 0.9076, C_ACC: 0.8227, DOA: 7.8645, ACC_k: 0.9557
MT[4,6]-14: Val Result: Acc: 0.9240, C_ACC: 0.8152, DOA: 6.5563, ACC_k: 0.9727
MT[4,6]-15: Val Result: Acc: 0.9253, C_ACC: 0.8220, DOA: 7.5267, ACC_k: 0.9669
                        
MT[3,7]-13: Val Result: Acc: 0.8920, C_ACC: 0.8188, DOA: 8.9324, ACC_k: 0.9430
MT[3,7]-14: Val Result: Acc: 0.9218, C_ACC: 0.8205, DOA: 7.4889, ACC_k: 0.9600
MT[3,7]-15: Val Result: Acc: 0.8787, C_ACC: 0.8222, DOA: 11.1863, ACC_k: 0.9215

MT[3,7]123-13: Val Result: Acc: 0.9161, C_ACC: 0.8183, DOA: 7.7130, ACC_k: 0.9565
MT[3,7]123-14: Val Result: Acc: 0.9283, C_ACC: 0.8204, DOA: 7.4296, ACC_k: 0.9630
MT[3,7]123-15: Val Result: Acc: 0.9176, C_ACC: 0.8278, DOA: 7.0289, ACC_k: 0.9682
                        
MT[2,8]-13: Val Result: Acc: 0.8741, C_ACC: 0.8255, DOA: 9.8845, ACC_k: 0.9240
MT[2,8]-14: Val Result: Acc: 0.9239, C_ACC: 0.8163, DOA: 7.6689, ACC_k: 0.9602
MT[2,8]-15: Val Result: Acc: 0.9051, C_ACC: 0.8223, DOA: 8.0383, ACC_k: 0.9546

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import models.KSL_base as KSL_base
from torchsummary import summary

import os
import sys
import math


def relu_fn(x):
    """ Swish activation function """
    return x * torch.sigmoid(x)


class Conv1dSamePadding(nn.Conv1d):
    """ 1D Convolutions like TensorFlow """
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
        super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)

    def forward(self, x):
        iw = x.size()[-1]
        kw = self.weight.size()[-1]
        sw = self.stride[-1]
        #print(iw, sw, kw)
        ow = math.ceil(iw / sw)
        pad = max((ow - 1) * self.stride[0] + (kw - 1) * self.dilation[0] + 1 - iw, 0)
        if pad > 0:
            x = F.pad(x, [pad//2, pad - pad //2])
        return F.conv1d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
    

def drop_connect(inputs, p, training):
    """ Drop connect. """
    if not training: return inputs
    batch_size = inputs.shape[0]
    keep_prob = 1 - p
    random_tensor = keep_prob
    #print(type(keep_prob), type(inputs))
    random_tensor += torch.rand([batch_size, 1, 1], dtype=inputs.dtype)  # uniform [0,1)
    binary_tensor = torch.floor(random_tensor).cuda()
    output = inputs / keep_prob * binary_tensor
    return output


class MBConvBlock(nn.Module):
    """
    Mobile Inverted Residual Bottleneck Block
    """

    def __init__(self, kernel_size, stride, expand_ratio, input_filters, output_filters, se_ratio, drop_n_add):
        super().__init__()
        
        self._bn_mom = 0.1
        self._bn_eps = 1e-03
        self.has_se = (se_ratio is not None) and (0 < se_ratio <= 1)
        self.expand_ratio = expand_ratio
        self.drop_n_add = drop_n_add

        # Filter Expansion phase
        inp = input_filters  # number of input channels
        oup = input_filters * expand_ratio  # number of output channels
        if expand_ratio != 1: # add it except at first block 
            self._expand_conv = Conv1dSamePadding(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
            self._bn0 = nn.BatchNorm1d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)

        # Depthwise convolution phase
        k = kernel_size
        s = stride
        self._depthwise_conv = Conv1dSamePadding(
            in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise(conv filter by filter)
            kernel_size=k, stride=s, bias=False)
        self._bn1 = nn.BatchNorm1d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)

        # Squeeze and Excitation layer, if desired
        if self.has_se:
            num_squeezed_channels = max(1,int(input_filters * se_ratio))  # input channel * 0.25 ex) block2 => 16 * 0.25 = 4
            self._se_reduce = Conv1dSamePadding(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
            self._se_expand = Conv1dSamePadding(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)

        # Output phase
        final_oup = output_filters
        self._project_conv = Conv1dSamePadding(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
        self._bn2 = nn.BatchNorm1d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
    
    def forward(self, inputs, drop_connect_rate=0.2):
    
        # Expansion and Depthwise Convolution
        x = inputs
        if self.expand_ratio != 1:
            x = relu_fn(self._bn0(self._expand_conv(inputs)))
        x = relu_fn(self._bn1(self._depthwise_conv(x)))

        # Squeeze and Excitation
        if self.has_se:
            x_squeezed = F.adaptive_avg_pool1d(x, 1)
            x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed)))
            x = torch.sigmoid(x_squeezed) * x
            
        # Output phase
        x = self._bn2(self._project_conv(x))

        # Skip connection and drop connect
        if self.drop_n_add == True:
            if drop_connect_rate:
                x = drop_connect(x, p=drop_connect_rate, training=self.training)
            x = x + inputs  # skip connection
        return x

class testNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Batch norm parameters
        bn_mom = 0.1
        bn_eps = 1e-03

        # stem
        in_channels = 6
        out_channels = 32
        self._conv_stem = Conv1dSamePadding(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
        self._bn0 = nn.BatchNorm1d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)

        # Build blocks
        self._blocks = nn.ModuleList([]) # list 형태로 model 구성할 때
        # stage2 r1_k3_s11_e1_i32_o16_se0.25
        self._blocks.append(MBConvBlock(kernel_size=3, stride=1, expand_ratio=1, input_filters=32, output_filters=16, se_ratio=0.25, drop_n_add=False))
        self._blocks.append(MBConvBlock(3, 2, 6, 16, 24, 0.25, False))
        #self._blocks.append(MBConvBlock(3, 1, 6, 24, 24, 0.25, True))
        # stage3 r2_k3_s22_e6_i16_o24_se0.25
        
        
        self._blocks_ssl = nn.ModuleList([])
        self._blocks_kws = nn.ModuleList([])
        
        #self._blocks_ssl.append(MBConvBlock(3, 2, 6, 16, 24, 0.25, False))
        self._blocks_ssl.append(MBConvBlock(3, 1, 6, 24, 24, 0.25, True))
        self._blocks_ssl.append(MBConvBlock(3, 2, 6, 24, 40, 0.25, False))
        self._blocks_ssl.append(MBConvBlock(3, 1, 6, 40, 40, 0.25, True))
        self._blocks_ssl.append(MBConvBlock(3, 2, 6, 40, 80, 0.25, False))
        self._blocks_ssl.append(MBConvBlock(3, 1, 6, 80, 80, 0.25, True))
        self._blocks_ssl.append(MBConvBlock(3, 1, 6, 80, 80, 0.25, True))
        
        #self._blocks_kws.append(MBConvBlock(3, 2, 6, 16, 24, 0.25, False))
        self._blocks_kws.append(MBConvBlock(3, 1, 6, 24, 24, 0.25, True))
        self._blocks_kws.append(MBConvBlock(3, 2, 6, 24, 40, 0.25, False))
        self._blocks_kws.append(MBConvBlock(3, 1, 6, 40, 40, 0.25, True))
        self._blocks_kws.append(MBConvBlock(3, 2, 6, 40, 80, 0.25, False))
        self._blocks_kws.append(MBConvBlock(3, 1, 6, 80, 80, 0.25, True))
        self._blocks_kws.append(MBConvBlock(3, 1, 6, 80, 80, 0.25, True))
        
        
        # Head 
        in_channels = 80
        out_channels = 112
        self._conv_head_ssl = Conv1dSamePadding(in_channels, out_channels, kernel_size=1, bias=False)
        self._bn1_ssl = nn.BatchNorm1d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
        
        self._conv_head_kws = Conv1dSamePadding(in_channels, out_channels, kernel_size=1, bias=False)
        self._bn1_kws = nn.BatchNorm1d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)

        # Final linear layer
        self._dropout = 0.2
        self._num_classes = 12
        self._localizer = nn.Linear(out_channels, self._num_classes)
        self._spotter = nn.Linear(out_channels, 1)
  

    def extract_features(self, inputs):
        """ Returns output of the final convolution layer """

        # Stem
        x = relu_fn(self._bn0(self._conv_stem(inputs)))

        # Blocks
        for idx, block in enumerate(self._blocks):          
            x = block(x)
        return x
    

    def ssl_features(self, x):
        for idx, block in enumerate(self._blocks_ssl):          
            x = block(x)
        return x
    
    def kws_features(self, x):
        # Blocks
        for idx, block in enumerate(self._blocks_kws):          
            x = block(x)
        return x

    def forward(self, inputs):
        """ Calls extract_features to extract features, applies final linear layer, and returns logits. """

        # Convolution layers
        x = self.extract_features(inputs)

        x_ssl = self.ssl_features(x)
        x_kws = self.kws_features(x)
        
        # Head
        x_ssl = relu_fn(self._bn1_ssl(self._conv_head_ssl(x_ssl)))
        x_ssl = F.adaptive_max_pool1d(x_ssl, 1).squeeze(-1)
        
        x_kws = relu_fn(self._bn1_kws(self._conv_head_kws(x_kws)))
        x_kws = F.adaptive_max_pool1d(x_kws, 1).squeeze(-1)

        if self._dropout:
            x_ssl = F.dropout(x_ssl, p=self._dropout, training=self.training)
            x_kws = F.dropout(x_kws, p=self._dropout, training=self.training)

        x_local= self._localizer(x_ssl).view(-1, 2, 6)
        x_kws = self._spotter(x_kws)
        
        return x_local[:, 0, :], x_local[:, 1, :], x_kws