In [1]:
# Imports and GPU check
%matplotlib inline
import os
import random
import time

import librosa
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.init as init
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader, Subset
from torchvision import models
from tqdm import tqdm
from itertools import product

# Fix HTTPS certificate error
import ssl
ssl_create_default_https_context = ssl._create_unverified_context

# Check for GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


Setting seed as 42

In [2]:
def set_seed(seed=42):
    """Sets the seed for reproducibility."""
    # Python RNG
    random.seed(seed)
    
    # PyTorch RNGs
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    
    # Numpy RNG
    np.random.seed(seed)
    
    # OS RNG
    os.environ['PYTHONHASHSEED'] = str(seed)

def worker_init_fn(worker_id):    
    """Ensure that the data loading process is deterministic."""
    np.random.seed(np.random.get_state()[1][0] + worker_id)
    
set_seed(42)  

Hyperparameters

In [3]:
# Hyperparameters
batch_size = 128
dataset_size = 5000  # Reduced dataset size for quicker training

epochs = 50
learning_rate = 0.001

Loading the whole dataset (20gbs, takes 4 and a half minutes and freezes pc)

In [4]:
"""import os
import librosa

def load_data_chunked(data_dir, chunk_size=100):
    data = []
    labels = []
    filenames = [filename for filename in os.listdir(data_dir) if filename.endswith('.wav')]
    num_files = len(filenames)
    num_chunks = num_files // chunk_size + 1

    for i in range(num_chunks):
        start_index = i * chunk_size
        end_index = min((i + 1) * chunk_size, num_files)
        chunk_data = []
        chunk_labels = []

        for filename in filenames[start_index:end_index]:
            file_path = os.path.join(data_dir, filename)
            audio, sr = librosa.load(file_path, sr=None)
            chunk_data.append(audio)
            chunk_labels.append(filename)  # Assuming labels are the filenames in this case
        
        data.append(chunk_data)
        labels.append(chunk_labels)
    
    return data, labels

# Assuming your project directory structure is like this:
# /PyCharm_project_folder
#     /Data
#         /musicnet
#             /train_data
#             /test_data
#             /train_labels
#             /test_labels

project_dir = os.getcwd()
train_data_dir = os.path.join(project_dir, 'Data', 'musicnet','musicnet', 'train_data')
test_data_dir = os.path.join(project_dir, 'Data', 'musicnet','musicnet', 'test_data')
train_labels_dir = os.path.join(project_dir, 'Data', 'musicnet','musicnet', 'train_labels')
test_labels_dir = os.path.join(project_dir, 'Data', 'musicnet','musicnet', 'test_labels')

# Load data in chunks
chunk_size = 10  # Adjust the chunk size as needed
train_data_chunks, train_labels_chunks = load_data_chunked(train_data_dir, chunk_size=chunk_size)
test_data_chunks, test_labels_chunks = load_data_chunked(test_data_dir, chunk_size=chunk_size)

# Do something with the loaded data chunks
for i, (chunk_data, chunk_labels) in enumerate(zip(train_data_chunks, train_labels_chunks)):
    print(f"Train data chunk {i+1}: {len(chunk_data)} files")
    print(f"Train labels chunk {i+1}: {len(chunk_labels)} labels")

for i, (chunk_data, chunk_labels) in enumerate(zip(test_data_chunks, test_labels_chunks)):
    print(f"Test data chunk {i+1}: {len(chunk_data)} files")
    print(f"Test labels chunk {i+1}: {len(chunk_labels)} labels")"""





'import os\nimport librosa\n\ndef load_data_chunked(data_dir, chunk_size=100):\n    data = []\n    labels = []\n    filenames = [filename for filename in os.listdir(data_dir) if filename.endswith(\'.wav\')]\n    num_files = len(filenames)\n    num_chunks = num_files // chunk_size + 1\n\n    for i in range(num_chunks):\n        start_index = i * chunk_size\n        end_index = min((i + 1) * chunk_size, num_files)\n        chunk_data = []\n        chunk_labels = []\n\n        for filename in filenames[start_index:end_index]:\n            file_path = os.path.join(data_dir, filename)\n            audio, sr = librosa.load(file_path, sr=None)\n            chunk_data.append(audio)\n            chunk_labels.append(filename)  # Assuming labels are the filenames in this case\n        \n        data.append(chunk_data)\n        labels.append(chunk_labels)\n    \n    return data, labels\n\n# Assuming your project directory structure is like this:\n# /PyCharm_project_folder\n#     /Data\n#         /

Loading only 1gb of train_data 

In [5]:
import os
import librosa

def load_data(data_dir, max_size_bytes):
    data = []
    labels = []
    total_size = 0
    for filename in os.listdir(data_dir):
        if filename.endswith('.wav'):
            file_path = os.path.join(data_dir, filename)
            file_size = os.path.getsize(file_path)
            if total_size + file_size <= max_size_bytes:
                audio, sr = librosa.load(file_path, sr=None)
                data.append(audio)
                labels.append(filename)  # Assuming labels are the filenames in this case
                total_size += file_size
            else:
                break
    return data, labels

# Assuming your project directory structure is like this:
# /PyCharm_project_folder
#     /Data
#         /musicnet
#             /train_data
#             /test_data
#             /train_labels
#             /test_labels

project_dir = os.getcwd()
train_data_dir = os.path.join(project_dir, 'Data', 'musicnet', 'musicnet', 'train_data')
test_data_dir = os.path.join(project_dir, 'Data', 'musicnet', 'musicnet', 'test_data')
train_labels_dir = os.path.join(project_dir, 'Data', 'musicnet', 'musicnet', 'train_labels')
test_labels_dir = os.path.join(project_dir, 'Data', 'musicnet', 'musicnet', 'test_labels')

# Load approximately 1GB of training data
max_train_data_size = 1 * 1024 * 1024 * 1024  # 1GB in bytes
train_data, train_labels = load_data(train_data_dir, max_train_data_size)

# Do something with the loaded data
print("Train data:", train_data)
print("Train labels:", train_labels)


Train data: [array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([ 0.0000000e+00, -3.0517578e-05,  0.0000000e+00, ...,
        0.0000000e+00,  0.0000000e+00,  0.0000000e+00], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)] [44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100]


Train data: [array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([ 0.0000000e+00, -3.0517578e-05,  0.0000000e+00, ...,
        0.0000000e+00,  0.0000000e+00,  0.0000000e+00], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32), array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)] [44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100, 44100]


RuntimeError: stack expects each tensor to be equal size, but got [40771584] at entry 0 and [30710016] at entry 1