In [17]:
import librosa
import torch
import numpy as np
import os

# Definieren der Variablen
source_dirs = ["data/knocks", "data/knocks_with_noise", "data/noises"]

# Durchlaufen des Ordners und Vorverarbeiten der Audiodateien
data = []
labels = []
for source_dir in source_dirs:
    for filename in os.listdir(source_dir):
        filepath = os.path.join(source_dir, filename)
        
        # Ignorieren von Verzeichnissen
        if os.path.isdir(filepath):
            continue
        
        # Audio laden
        y, sr = librosa.load(filepath)

        # Berechnen von MFCCs
        mfcc = librosa.feature.mfcc(y=y, sr=sr)

        print(mfcc.shape, len(mfcc), len(mfcc[0]))

        # Hinzufügen einer Kanaldimension
        mfcc = np.expand_dims(mfcc, axis=0)
        print(mfcc.shape, len(mfcc), len(mfcc[0]))


        data.append(mfcc)
        labels.append(1 if filename.startswith("knock") else 0)


from torch.utils.data import Dataset

class AudioDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample, self.labels[idx]
    
from torchvision import transforms

# Definieren der Transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
    
dataset = AudioDataset(data, labels, transform=transform)
# Speichern des Datasets
torch.save(dataset, 'dataset.pth')

(20, 87) 20 87
(1, 20, 87) 1 20


In [18]:
from torchvision import transforms

dataset = torch.load('dataset.pth')
print(type(dataset), len(dataset))
print(type(dataset.__getitem__(0)))
print(dataset.__getitem__(0))



<class '__main__.AudioDataset'>
<class 'tuple'>
(tensor([[[-1.2745e+03,  2.3464e+02, -2.7337e+01,  ...,  4.8698e+00,
          -1.0860e+00,  1.0392e+01]],

        [[-1.2037e+03,  2.3812e+02, -2.9034e+01,  ...,  1.4742e+00,
           2.0559e+00,  8.2397e+00]],

        [[-1.1983e+03,  2.4000e+02, -3.1538e+01,  ..., -5.7936e-01,
          -2.9374e+00,  6.7674e+00]],

        ...,

        [[-1.1934e+03,  2.5568e+02, -1.3737e+01,  ...,  6.0407e-01,
          -9.4601e+00,  9.7316e+00]],

        [[-1.2061e+03,  2.4852e+02, -7.8982e+00,  ...,  4.9105e+00,
          -9.9413e+00,  2.6640e+00]],

        [[-1.2610e+03,  2.4565e+02, -8.5355e+00,  ..., -9.6243e-01,
          -1.0896e+01,  8.8208e+00]]]), 1)
