In [1]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
import random
import shutil
import torch
import torchvision
import sklearn
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from torchvision import models
import torch.nn as nn
import torch.optim as optim  
#import datasets
#from datasets import load_dataset
from torchvision import transforms
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.model_selection import KFold

In [2]:
source_dir = 'dataset'  # Directory containing your .wav files
output_dir = 'pleasework'  # Directory to save spectrogram images
train_dir = os.path.join(output_dir, 'train')
valid_dir = os.path.join(output_dir, 'valid')
test_dir = os.path.join(output_dir, 'test')

# Create necessary directories
for folder in [train_dir, valid_dir, test_dir]:
    os.makedirs(folder, exist_ok=True)

def save_spectrogram(wav_path, img_path):
    # Load audio file
    y, sr = librosa.load(wav_path, sr=None)
    # Generate spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    # Plot spectrogram
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(D, x_axis='time', y_axis='log', sr=sr)
    plt.colorbar(format='%+2.0f dB')
    # Save spectrogram as an image
    plt.savefig(img_path, bbox_inches='tight', pad_inches=0)
    plt.close()

def process_files(source_dir):
    files_by_class = {'B': [], 'I': [], 'F': []}

    # Iterate through all .wav files in source directory
    for filename in os.listdir(source_dir):
        if filename.endswith('.wav'):
            label = filename[0]  # B, I, or F from the filename
            if label in files_by_class:
                files_by_class[label].append(os.path.join(source_dir, filename))
    
    return files_by_class

def split_and_save(files_by_class):
    for label, files in files_by_class.items():
        # Split the files into train, test, and valid sets (80%, 10%, 10%)
        train_files, temp_files = train_test_split(files, test_size=0.3, random_state=42)
        valid_files, test_files = train_test_split(temp_files, test_size=0.4, random_state=42)

        # Save files to respective directories
        for folder, file_list in zip([train_dir, valid_dir, test_dir], [train_files, valid_files, test_files]):
            for file in file_list:
                # Create spectrogram and save it
                img_filename = os.path.splitext(os.path.basename(file))[0] + '.png'
                img_path = os.path.join(folder, label, img_filename)
                os.makedirs(os.path.dirname(img_path), exist_ok=True)
                save_spectrogram(file, img_path)

# Process the files and split them
files_by_class = process_files(source_dir)
split_and_save(files_by_class)

print("Preprocessing complete. Spectrogram images are saved.")

FileNotFoundError: [Errno 2] No such file or directory: 'dataset'

In [3]:
train_dataset = ImageFolder(root="pleasework/train", transform=transform)
valid_dataset = ImageFolder(root="pleasework/valid", transform=transform)

# Merge datasets
combined_dataset = torch.utils.data.ConcatDataset([train_dataset, valid_dataset])


# Set parameters
data_dir = "pleasework/train"
k_folds = 4
batch_size = 32
epochs = 10
learning_rate = 0.001

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

fold_results = []
for fold, (train_idx, valid_idx) in enumerate(kf.split(combined_dataset)):
    print(f"\n--- Fold {fold+1}/{k_folds} ---")
    
    # Create data subsets
    train_subset = Subset(combined_dataset, train_idx)
    valid_subset = Subset(combined_dataset, valid_idx)
    
    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_subset, batch_size=batch_size, shuffle=False)
    
    # Load MobileNetV2 model
    model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
    model.classifier[1] = nn.Linear(model.classifier[1].in_features, 3)
    model = model.to(device)
    
    # Define loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")
    
    # Validation
    model.eval()
    correct, total = 0, 0
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())
    
    fold_accuracy = 100 * correct / total
    fold_results.append(fold_accuracy)
    print(f"Validation Accuracy for Fold {fold+1}: {fold_accuracy:.2f}%")

# Print average accuracy across folds
print(f"\nAverage K-Fold Accuracy: {np.mean(fold_results):.2f}%")

# Load test set
test_dataset = ImageFolder(root="pleasework/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Evaluate on test set
print("\n--- Final Test Evaluation ---")
model.eval()
correct, total = 0, 0
y_true, y_pred = [], []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=test_dataset.classes, yticklabels=test_dataset.classes)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Final Test Set')
plt.show()


NameError: name 'transform' is not defined