In [4]:
import os
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torchaudio

class MusicDataset(Dataset):
    def __init__(self, data_dir, metadata_file, labels_dir, max_size_bytes=None):
        self.data_dir = data_dir
        self.metadata = pd.read_csv(metadata_file)
        self.labels_dir = labels_dir
        self.filenames = [filename for filename in os.listdir(data_dir) if filename.endswith('.wav')]
        if max_size_bytes is not None:
            total_size = 0
            new_filenames = []
            for filename in self.filenames:
                file_path = os.path.join(data_dir, filename)
                file_size = os.path.getsize(file_path)
                if total_size + file_size <= max_size_bytes:
                    new_filenames.append(filename)
                    total_size += file_size
                else:
                    break
            self.filenames = new_filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        audio_file_path = os.path.join(self.data_dir, filename)
        waveform, sample_rate = torchaudio.load(audio_file_path)

        label_file_path = os.path.join(self.labels_dir, filename.replace('.wav', '.csv'))
        labels = pd.read_csv(label_file_path)

        return waveform, sample_rate, labels

# Load the data
data_dir = 'Data/musicnet/musicnet/train_data'
labels_dir = 'Data/musicnet/musicnet/train_labels'
dataset = MusicDataset(data_dir, labels_dir)

# Split the data into training and validation sets
train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)

# Define your model
model = torch.nn.Linear(1, 1)  # Simple linear regression

# Define a loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
for epoch in range(50):  # Number of epochs
    for waveform, label in train_data:
        # Convert the label to a tensor
        label = torch.tensor([float(label)])

        # Forward pass
        outputs = model(waveform)
        loss = criterion(outputs, label)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, 100, loss.item()))

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for waveform, label in val_data:
        label = torch.tensor([float(label)])
        outputs = model(waveform)
        _, predicted = torch.max(outputs.data, 1)
        total += label.size(0)
        correct += (predicted == label).sum().item()

    print('Accuracy of the model on the test data: {} %'.format(100 * correct / total))

TypeError: MusicDataset.__init__() missing 1 required positional argument: 'labels_dir'