In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import librosa
import numpy as np
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

# make train-test-val folder

In [6]:
import os
import shutil
from glob import glob
import random

# Seed for reproducibility
random.seed(42)

# Source directories
source_dirs = ["data/knocks", "data/knocks_with_noise", "data/noises"]

# Destination directories
dest_dirs = {
    "train": "data/train",
    "test": "data/test",
    "validation": "data/validation"
}

# Collect all sound files
sound_files = []
for source_dir in source_dirs:
    sound_files.extend(glob(os.path.join(source_dir, "*.wav")))

# Shuffle the sound files
random.shuffle(sound_files)

# Split the data
num_test = 20
num_remaining = len(sound_files) - num_test
num_train = int(num_remaining * 0.8)
num_validation = num_remaining - num_train

# Ensure destination directories exist
for dir_path in dest_dirs.values():
    os.makedirs(dir_path, exist_ok=True)

# Function to copy files to destination
def copy_files(files, destination):
    for file in files:
        shutil.copy(file, destination)

# Copy files to their respective directories
copy_files(sound_files[:num_test], dest_dirs["test"])
copy_files(sound_files[num_test:num_test + num_train], dest_dirs["train"])
copy_files(sound_files[num_test + num_train:], dest_dirs["validation"])

print(f"Files distributed: {num_test} test, {num_train} train, {num_validation} validation.")

Files distributed: 20 test, 600 train, 151 validation.


In [9]:
class AudioMFCCDataset(Dataset):
    def __init__(self, audio_dir, n_mfcc=13, transform=None):
        """
        Args:
            audio_dir (string): Directory with all the audio files.
            n_mfcc (int): Number of MFCC features to extract.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.audio_dir = audio_dir
        self.audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.wav')]
        self.n_mfcc = n_mfcc
        self.transform = transform

    def __len__(self):
        return len(self.audio_files)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        audio_path = os.path.join(self.audio_dir, self.audio_files[idx])
        audio, sr = librosa.load(audio_path, sr=None)
        sample = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=self.n_mfcc)

        # Extract label from filename
        filename = self.audio_files[idx]
        label = 1 if filename.split('_')[0].lower() == 'knock' else 0

        if self.transform:
            sample = self.transform(sample)

        return sample, label

In [10]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_dataset = AudioMFCCDataset(audio_dir='data/train', transform=transform)
test_dataset = AudioMFCCDataset(audio_dir='data/test', transform=transform)
validation_dataset = AudioMFCCDataset(audio_dir='data/validation', transform=transform)

# Example: Use DataLoader to iterate through the dataset
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=4, shuffle=True)

# Example of accessing a single batch
for i, batch in enumerate(train_dataloader):
    print(batch['mfcc'].shape, batch['sr'])
    if i == 0:  # Just print the first batch to check
        break

TypeError: Unexpected type <class 'numpy.ndarray'>

In [None]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder  # Assuming dataset is organized in a folder per class

# Step 2: Define transformations


# Step 3: Load a pretrained ResNet model
model = models.resnet18(pretrained=True)

# Step 4: Modify the model for your dataset
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, num_classes)  # Replace num_classes with your dataset's number of classes

# Define loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Step 6: Train or Fine-Tune the Model
# Example training loop (simplified)
model.train()
for epoch in range(num_epochs):  # num_epochs is the number of epochs you want to train for
    for inputs, labels in data_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")