In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio.transforms as T
import torchaudio
import librosa
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

class AudioDataset(Dataset):
    def __init__(self, file_paths, labels, n_mels=128):
        self.file_paths = file_paths
        self.labels = labels
        self.n_mels = n_mels

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        audio, _ = librosa.load(self.file_paths[idx], sr=None, mono=True)
        mel_spec = librosa.feature.melspectrogram(audio, n_mels=self.n_mels)
        log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
        log_mel_spec = torch.tensor(log_mel_spec).unsqueeze(0)
        label = self.labels[idx]
        return log_mel_spec, label

class TransformerAudioClassifier(nn.Module):
    def __init__(self, input_size, num_classes, nhead=8, num_layers=6, dim_feedforward=2048):
        super().__init__()
        self.transformer = nn.Transformer(input_size, nhead, num_layers, dim_feedforward)
        self.fc = nn.Linear(input_size, num_classes)

    def forward(self, x):
        x = self.transformer(x)
        x = self.fc(x)
        return x

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for (inputs, labels) in tqdm(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    return running_loss / len(train_loader)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pickle
with open("path.pkl", "rb") as file:
    train_file_paths = pickle.load(file)
with open("label.pkl", "rb") as file:
    train_labels = pickle.load(file)
input_size = 256 # Set to the number of Mel features used
num_classes = 209 # Set to the number of classes in your dataset

train_dataset = AudioDataset(train_file_paths, train_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4)

model = TransformerAudioClassifier(input_size, num_classes).to("cuda:0")

criterion = nn.CrossEntropyLoss()
op = torch.optim.Adam(model.parameters(), lr=0.001)
print(1)
loss = train(model, train_loader, criterion, op, "cuda:0")

1


  0%|          | 0/7723 [00:00<?, ?it/s]

In [6]:

import os
import json
tmp = []
tmp2 = []
tmp3 = []
def search(dirname):
    try:
        filenames = os.listdir(dirname)
        for filename in filenames:
            full_filename = os.path.join(dirname, filename)
            if os.path.isdir(full_filename):
                search(full_filename)
            else:
                ext = os.path.splitext(full_filename)[-1]
                if ext == '.json': 
                    with open(full_filename.replace("\\", "/"), "r", encoding='UTF8') as f:
                        t1 = json.load(f)
                    r = t1["environment"]["areaUse"] + "/" + t1["environment"]["place"] + "/" + t1["annotations"][0]["subCategory"]
                    if r not in tmp:
                        tmp.append(r)
                        t = []
                        t.append(len(tmp) - 1)
                        t.append(t1["annotations"][0]["labelName"])
                        tmp3.append(t)
                    else:
                        t = []
                        t.append(tmp.index(r))
                        t.append(t1["annotations"][0]["labelName"])
                        tmp3.append(t)
    except PermissionError:
        pass
    return tmp3


def search2(dirname2):
    try:
        filenames = os.listdir(dirname2)
        for filename in filenames:
            full_filename = os.path.join(dirname2, filename)
            if os.path.isdir(full_filename):
                search2(full_filename)
            else:
                ext = os.path.splitext(full_filename)[-1]
                if ext == '.wav': 
                    tmp2.append(full_filename.replace("\\", "/"))
    except PermissionError:
        pass
    
    labels_and_names = tmp3
    file_paths = tmp2

    name_label_dict = {name: label for label, name in labels_and_names}

    filepath = []
    filelabel = []
    for path in file_paths:
        file_name = path.split("/")[-1]
        label = name_label_dict.get(file_name)
        if label is not None:
            filepath.append(path)
            filelabel.append(label)

    return filepath, filelabel


a = search("C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/라벨링데이터/")
b, c = search2("C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/")

In [10]:
with open("asdf.txt", "w", encoding = "utf-8") as temp :
    for i in tmp:
        temp.write(i)
        temp.write("\n")

In [17]:
for i, j in tmp3:
    print(i,j)

0 1.자동차_10_1.wav
1 1.자동차_10003_1.wav
1 1.자동차_10008_1.wav
2 1.자동차_10009_1.wav
1 1.자동차_10037_1.wav
1 1.자동차_10042_1.wav
1 1.자동차_10055_1.wav
1 1.자동차_10063_1.wav
1 1.자동차_10066_1.wav
1 1.자동차_10084_1.wav
1 1.자동차_10086_1.wav
1 1.자동차_10087_1.wav
2 1.자동차_10093_1.wav
3 1.자동차_101_1.wav
2 1.자동차_10115_1.wav
1 1.자동차_10124_1.wav
1 1.자동차_10126_1.wav
1 1.자동차_10164_1.wav
2 1.자동차_10170_1.wav
1 1.자동차_10173_1.wav
1 1.자동차_10181_1.wav
1 1.자동차_10187_1.wav
2 1.자동차_10195_1.wav
1 1.자동차_10205_1.wav
1 1.자동차_10224_1.wav
1 1.자동차_10240_1.wav
2 1.자동차_10249_1.wav
2 1.자동차_10260_1.wav
1 1.자동차_10273_1.wav
0 1.자동차_103_1.wav
1 1.자동차_10329_1.wav
1 1.자동차_10330_1.wav
1 1.자동차_10342_1.wav
1 1.자동차_10345_1.wav
1 1.자동차_10366_1.wav
1 1.자동차_10368_1.wav
2 1.자동차_10372_1.wav
2 1.자동차_10389_1.wav
1 1.자동차_10390_1.wav
1 1.자동차_10395_1.wav
1 1.자동차_10407_1.wav
2 1.자동차_10412_1.wav
2 1.자동차_10420_1.wav
1 1.자동차_10428_1.wav
1 1.자동차_10438_1.wav
1 1.자동차_10446_1.wav
2 1.자동차_10463_1.wav
2 1.자동차_10468_1.wav
1 1.자동차_10481_1.wav
1 1.자동차_10492_1.wav
4 1.자동차

In [11]:
import pickle

with open("val_path.pkl", "wb") as path:
    pickle.dump(b, path)

with open("val_label.pkl", "wb") as label:
    pickle.dump(c, label)

In [15]:
import pickle
with open("val_label.pkl", "rb") as file:
    val_labels = pickle.load(file)

with open("val_path.pkl", "rb") as file:
    val_paths = pickle.load(file)

In [16]:
for i, j in zip(val_labels, val_paths):
    print(j, i)

C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10003_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10008_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10009_1.wav 2
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10037_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10042_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10055_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10063_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10066_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10084_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Validation/원천데이터/1.자동차/1.차량경적/1.자동차_10086_1.wav 1
C:/Users/User/Desktop/130.도시 소리 데이터/01.데이터/2.Valid