In [1]:
import os
import glob
import random

from collections import namedtuple

import librosa

import matplotlib.pyplot as plt

import IPython.display as ipd

import numpy as np

In [2]:
import queue

import time

import threading

from tqdm import tqdm

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
FileInfo = namedtuple('file_info', 'file_path mode equipment status equip_id file_id')

BatchData = namedtuple('batch_data', 'mel equipment status')

In [5]:
dataset_path = 'dev_data/*'

dataset_direc_list = [path for path in glob.glob(dataset_path) if os.path.isdir(path)]
dataset_direc_list.sort()

print(dataset_direc_list)

['dev_data/ToyCar', 'dev_data/ToyConveyor', 'dev_data/fan', 'dev_data/pump', 'dev_data/slider', 'dev_data/valve']


In [6]:
equipments = ['ToyCar', 'ToyConveyor', 'fan', 'pump', 'slider', 'valve']

EQUIPMENT_DICT = {
    equip: i for i, equip in enumerate(equipments)
}

status = ['normal', 'anomaly']

STATUS_DICT = {
    stat: i for i, stat in enumerate(status)
}

In [7]:
STATUS_DICT

{'normal': 0, 'anomaly': 1}

In [8]:
def get_metadata(dataset_dir, mode='train'):
    
    file_path_list = glob.glob(direc + '/' + mode + '/*.wav')
    file_path_list.sort()
    metadata = [FileInfo(file_path, mode, os.path.basename(direc), *path_to_file_info(file_path)) for file_path in file_path_list]

    return metadata

In [9]:
def path_to_file_info(path):
    
    '''
    return status, equip_id, file_num
    '''
    
    segments = os.path.basename(path).split('_')
    
    return segments[0], segments[2], segments[3]
    

In [10]:
def audio_visual_inspection(metadatum):
    file = getattr(metadatum, 'file_path')

    print(file)

    y, sr = librosa.core.load(file, sr=None)
    
    mel = librosa.feature.melspectrogram(y, sr=sr, n_fft=int(sr * 0.1), hop_length=int(sr * 0.05), power=1, n_mels=160)
    mel = 20 * np.log10(np.maximum(mel, 1e-8))

    fig, axes = plt.subplots(2, 1, figsize=(15, 6))
    axes[0].plot(y)
    axes[0].set_xlim([0, len(y)])
    axes[1].imshow(mel, origin='reversed', aspect='auto')
    plt.tight_layout()
    plt.show()
    
    print(mel.shape)

    return ipd.Audio(y, rate=sr)

In [11]:
metadata_train = list()
metadata_test = list()

for direc in dataset_direc_list:
# direc = random.choice(dataset_direc_list)

# print(direc)

#     metadata_train.append(get_metadata(direc, 'train'))
#     metadata_test.append(get_metadata(direc, 'test'))
    metadata_train += get_metadata(direc, 'train')
    metadata_test += get_metadata(direc, 'test')
    
# print(list(map(len, metadata_train)))
# print(list(map(len, metadata_test)))

print(len(metadata_train))
print(len(metadata_test))

20119
10868


In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
def load_mel(file_path):

    y, sr = librosa.core.load(file_path, sr=None)
    
    y = y[:sr * 10]
    
    mel = librosa.feature.melspectrogram(y, sr=sr, n_fft=int(sr * 0.1), hop_length=int(sr * 0.05), power=1, n_mels=160)
    mel = (20 * np.log10(np.maximum(mel, 1e-8)) + 160 ) / 160
    
    return mel

def batch_list_to_batch(batch_list):
    
    mel_batch_list = list()
    equip_list = list()
    status_list = list()
    
    for mel, equip, status in batch_list:
        mel_batch_list.append(mel)
        equip_list.append(EQUIPMENT_DICT[equip])
        status_list.append(STATUS_DICT[status])
        
    # print(mel.shape) # (16, 160, 201)
    
    return BatchData(np.stack(mel_batch_list), np.array(equip_list, dtype=int), np.array(status_list, dtype=int))

class DatasetFeeder:
    
    def __init__(self, metadata_list):
        self.batch_queue = queue.Queue(maxsize=100)
        self.batch_size = 16
        self.metadata_list = metadata_list
        self.batching_finished = False
        self.max_batch_num = int(np.ceil(len(self.metadata_list) / self.batch_size))
        
    def start_batching(self):
        
        random.shuffle(self.metadata_list)
        
        batch_data_list = list()
        
        for metadata in self.metadata_list:
            
            file_path = getattr(metadata, 'file_path')
            equipment = getattr(metadata, 'equipment')
            status = getattr(metadata, 'status')
            
            mel = load_mel(file_path)
            
            batch_data_list.append((mel, equipment, status))
            
            if len(batch_data_list) >= self.batch_size:
                self.batch_queue.put(batch_list_to_batch(batch_data_list))
                batch_data_list = list()
        
        if len(batch_data_list) > 0:
            self.batch_queue.put(batch_list_to_batch(batch_data_list))
            batch_data_list = list()
            
        self.batching_finished = True
    
    def batch_generator(self):
        
        self.batching_finished = False
        t = threading.Thread(target=self.start_batching, args=())
        t.start()
        
        while not (self.batching_finished and self.batch_queue.empty()):
            try : 
                batch = self.batch_queue.get_nowait()
                yield batch
                
                break
                
            except:
                time.sleep(1)
                
        t.join()
            
        return 0
        

In [14]:
class CRNN_Model(nn.Module):
    
    def __init__(self):
        super(CRNN_Model, self).__init__()
        self.cnn_layers_1 = nn.Sequential(nn.Conv2d(1, 64, (9, 3), dilation=2), 
                                        nn.BatchNorm2d(64), 
                                        nn.ReLU(),
                                        nn.Conv2d(64, 128, (9, 3), dilation=2), 
                                        nn.BatchNorm2d(128),
                                        nn.ReLU())
        
        self.cnn_layers_2 = nn.Sequential(nn.Conv2d(128, 128, (9, 3), dilation=2), 
                                nn.BatchNorm2d(128), 
                                nn.ReLU(),
                                nn.Conv2d(128, 128, (9, 3), dilation=2), 
                                nn.BatchNorm2d(128),
                                nn.ReLU())
        
        self.cnn_layers_3 = nn.Sequential(nn.Conv2d(128, 256, (9, 3), dilation=2), 
                        nn.BatchNorm2d(256), 
                        nn.ReLU(),
                        nn.Conv2d(256, 256, (9, 3), dilation=2), 
                        nn.BatchNorm2d(256),
                        nn.ReLU())
        
        self.cnn_layers_4 = nn.Sequential(nn.Conv2d(256, 256, (9, 3), dilation=2), 
                nn.BatchNorm2d(256), 
                nn.ReLU(),
                nn.Conv2d(256, 256, (9, 3), dilation=2), 
                nn.BatchNorm2d(256),
                nn.ReLU())
        
        self.cnn_layers_5 = nn.Sequential(nn.Conv2d(256, 512, (32, 3)), 
        nn.BatchNorm2d(512), 
        nn.ReLU())

        self.rnn_layers = nn.ModuleList((nn.GRU(512, 256, batch_first=True),
                                        nn.GRU(256, 128, batch_first=True),
                                        nn.GRU(128, 128, batch_first=True)))
        
        
        
    def forward(self, input_tensor):
        
        print(input_tensor.shape)
        
        tensor = self.cnn_layers_1(input_tensor)
        
        print(tensor.shape)
        
        tensor = self.cnn_layers_2(tensor)
        
        print(tensor.shape)
        
        tensor = self.cnn_layers_3(tensor)
        
        print(tensor.shape)
        
        tensor = self.cnn_layers_4(tensor)
        
        print(tensor.shape)
        
        tensor = self.cnn_layers_5(tensor)
        
        print(tensor.shape)
        
        tensor = torch.squeeze(tensor, 2)
        
        tensor.transpose_(1, 2)
        
        for rnn_layer in self.rnn_layers:
            tensor, _ = rnn_layer(tensor)
        
        return tensor

In [15]:
net = CRNN_Model().to(device)

In [21]:
mel_batch = np.expand_dims(batch[0], 1)

In [22]:
output_tensor = net(torch.tensor(mel_batch).to(device))

torch.Size([16, 1, 160, 201])
torch.Size([16, 128, 128, 193])
torch.Size([16, 128, 96, 185])
torch.Size([16, 256, 64, 177])
torch.Size([16, 256, 32, 169])
torch.Size([16, 512, 1, 167])


In [24]:
torch.squeeze(output_tensor).shape

torch.Size([16, 167, 128])

In [19]:
train_dataset_feeder = DatasetFeeder(metadata_train)

In [20]:
for i, batch in tqdm(enumerate(train_dataset_feeder.batch_generator()), total=train_dataset_feeder.max_batch_num):
    
    mel_batch = np.expand_dims(batch[0], 1)
    
    print(batch[0].shape)
#     print(batch[1])
#     print(batch[2])
    print('###############')
    pass

  0%|          | 1/1258 [00:01<21:23,  1.02s/it]

(16, 160, 201)
###############


KeyboardInterrupt: 

In [None]:
metadatum = random.choice(metadata_train)
audio_visual_inspection(metadatum)

In [None]:
while True:
    metadatum = random.choice(metadata_test)
    
    if 'anom' in metadatum[0]: break
    
audio_visual_inspection(metadatum)