In [1]:
import torch
import random
import numpy as np

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False



In [2]:
import torch
import torch.nn as nn
import torchaudio
import torchvggish
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import torchaudio.transforms as T

class ChordDataset(Dataset):
    
    def __init__(self, file_paths, labels, sample_rate=16000):
        self.file_paths = file_paths
        self.labels = labels
        self.sample_rate = sample_rate
        
        # VGGish expects 96 mel bands
        self.mel_transform = T.MelSpectrogram(
            sample_rate=sample_rate,
            n_fft=2048,
            win_length=400,
            hop_length=160,
            n_mels=96,
            f_min=125,
            f_max=7500
        )
        
        # Log mel spectrogram
        self.amplitude_to_db = T.AmplitudeToDB()
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        # Load audio
        waveform, sr = torchaudio.load(self.file_paths[idx])
        #print(f"Getting: {self.file_paths[idx]}")
        
        # Resample if necessary
        if sr != self.sample_rate:
            resampler = T.Resample(sr, self.sample_rate)
            waveform = resampler(waveform)
        
        # Convert to mono if stereo
        if waveform.size(0) > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        
        # Get mel spectrogram
        mel_spec = self.mel_transform(waveform)
        
        # Convert to dB scale
        mel_spec = self.amplitude_to_db(mel_spec)

        # Add normalization step for mel spectrograms
        mel_spec = (mel_spec - mel_spec.mean()) / (mel_spec.std() + 1e-8)
        #print(f"Getting mel_spec: {mel_spec}")
        
        # VGGish expects input size of (batch_size, 1, 96, 64)
        # So we need to ensure our time dimension is 64 frames
        target_length = 64
        current_length = mel_spec.size(2)
        
        if current_length < target_length:
            # Pad if too short
            padding = target_length - current_length
            mel_spec = torch.nn.functional.pad(mel_spec, (0, padding))
            
        elif current_length > target_length:
            # Take center portion if too long
            start = (current_length - target_length) // 2
            mel_spec = mel_spec[:, :, start:start + target_length]
        
        # Add channel dimension
       # mel_spec = mel_spec.unsqueeze(0)
            
        return mel_spec, torch.tensor(self.labels[idx], dtype=torch.float32)

class ChordClassifier(nn.Module):
    def __init__(self, num_classes=1):
        super().__init__()
        self.feature_extractor = torchvggish.vggish()

        # Freeze layers, but keep last layers unfrozen
        for name, param in self.feature_extractor.named_parameters():
            if 'embeddings.4' in name or 'embeddings.2' in name:# or 'embeddings.0' in name:  # Unfreeze only the last layer
                param.requires_grad = True
                print(f"Unfrozen parameter: {name}, requires_grad: {param.requires_grad}")
            else:
                param.requires_grad = False  # Keep all other layers frozen
                print(f"Frozen parameter: {name}, requires_grad: {param.requires_grad}")
        
            
        # Simple classifier on top of VGGish embeddings
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),  # VGGish outputs 128-dimensional embeddings
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, num_classes),
            nn.Sigmoid()
        )

    def forward(self, x):
        # Remove torch.no_grad() here
        features = self.feature_extractor(x)  # Remove torch.no_grad() here
        return self.classifier(features)

class ChordTrainer:
    def __init__(self, model, criterion, optimizer, device, threshold=0.5):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device
        self.threshold = threshold
        
    def train_epoch(self, train_dataloader, val_dataloader):
        self.model.train()
        running_loss = 0.0

        for i, (inputs, labels) in enumerate(train_dataloader):
            print("Processing batch", i + 1)
            
            inputs, labels = inputs.to(self.device), labels.to(self.device)
              
            outputs = self.model(inputs)
            loss = self.criterion(outputs.squeeze(), labels)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()           
            running_loss += loss.item()
            
            # Every 3 batches, compute average training loss and validation loss
            if (i + 1) % 3 == 0:
                avg_train_loss = running_loss / 3
                print(f'Batch {i + 1}, Training Loss: {avg_train_loss:.4f}')
                running_loss = 0.0

                # Compute validation loss
                val_loss = self.evaluate_loss(val_dataloader)
                print(f'Batch {i + 1}, Validation Loss: {val_loss:.4f}')

    def evaluate_loss(self, dataloader):
        self.model.eval()
        running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs.squeeze(), labels)
                running_loss += loss.item()

        # Return average loss over the validation set
        return running_loss / len(dataloader)

    def evaluate(self, dataloader):
        self.model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                predicted = (outputs.squeeze() > self.threshold).float()
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        return classification_report(
            all_labels, 
            all_preds, 
            target_names=["Minor", "Major"], 
            zero_division=0
        )

    

def get_dataloader(file_paths, labels, batch_size=16, shuffle=True):
    def worker_init_fn(worker_id):
        np.random.seed(42 + worker_id)
    
    dataset = ChordDataset(file_paths, labels)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, worker_init_fn=worker_init_fn)



from sklearn.model_selection import train_test_split

def main():

    set_seed(42)
    
    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    file_dir = r'C:\Users\rapha\repositories\guitar_hero\data\raw\kaggle_chords\Training'
    file_dir_test = r'C:\Users\rapha\repositories\guitar_hero\data\raw\other'
    
    # Function to gather all .wav file paths from subdirectories
    def gather_file_paths(dir_path):
        file_paths = []
        for root, dirs, files in os.walk(dir_path):
            wav_files = [os.path.join(root, f) for f in files if f.endswith('.wav')]
            file_paths.extend(wav_files)
        return file_paths

    # Gather all file paths from the directory
    file_paths = gather_file_paths(file_dir)
    file_paths_test = gather_file_paths(file_dir_test)
    
    # Create labels
    labels = [0 if 'Minor' in f else 1 for f in file_paths if f.endswith('.wav')]
    test_labels = [0 if 'Minor' in f else 1 for f in file_paths_test if f.endswith('.wav')]

    # Train-validation split (80% train, 20% validation)
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        file_paths, labels, test_size=0.2, random_state=42, stratify=labels
    )

    batch_size = 16
    
    # DataLoader for training and validation sets
    train_dataloader = get_dataloader(train_paths, train_labels, batch_size=batch_size)
    val_dataloader = get_dataloader(val_paths, val_labels, batch_size=batch_size)
    test_dataloader = get_dataloader(file_paths_test, test_labels, batch_size=batch_size)
    
    # Initialize model
    model = ChordClassifier().to(device)
    
    # Check trainable parameters
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total trainable parameters: {total_trainable_params}")
    
    # Training setup
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    trainer = ChordTrainer(model, criterion, optimizer, device, threshold=0.5)
    
    # Training loop
    num_epochs = 1
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch + 1}/{num_epochs}')
        trainer.train_epoch(train_dataloader, val_dataloader)  # Pass both train and val dataloaders

    # Evaluation on validation set
    report = trainer.evaluate(val_dataloader)
    print('\nValidation Results:')
    print(report)

     # Evaluation
    report = trainer.evaluate(test_dataloader)
    print('\nTest Results:')
    print(report)


if __name__ == '__main__':
    # Set the seed value
    main()

Frozen parameter: features.0.weight, requires_grad: False
Frozen parameter: features.0.bias, requires_grad: False
Frozen parameter: features.3.weight, requires_grad: False
Frozen parameter: features.3.bias, requires_grad: False
Frozen parameter: features.6.weight, requires_grad: False
Frozen parameter: features.6.bias, requires_grad: False
Frozen parameter: features.8.weight, requires_grad: False
Frozen parameter: features.8.bias, requires_grad: False
Frozen parameter: features.11.weight, requires_grad: False
Frozen parameter: features.11.bias, requires_grad: False
Frozen parameter: features.13.weight, requires_grad: False
Frozen parameter: features.13.bias, requires_grad: False
Frozen parameter: embeddings.0.weight, requires_grad: False
Frozen parameter: embeddings.0.bias, requires_grad: False
Unfrozen parameter: embeddings.2.weight, requires_grad: True
Unfrozen parameter: embeddings.2.bias, requires_grad: True
Unfrozen parameter: embeddings.4.weight, requires_grad: True
Unfrozen para

In [15]:
import os

def add_minor_to_filenames(directory_path):
    """
    This function adds '-Minor' to all filenames in the specified directory.
    
    :param directory_path: Path to the directory where files are located
    """
    try:
        # Loop through all files in the specified directory
        for filename in os.listdir(directory_path):
            # Construct full file path
            full_file_path = os.path.join(directory_path, filename)
            
            # Check if it is a file (not a directory)
            if os.path.isfile(full_file_path):
                # Add '-Minor' before the file extension
                name, ext = os.path.splitext(filename)
                new_name = f"{name}-Minor{ext}"
                
                # Construct full new file path
                new_file_path = os.path.join(directory_path, new_name)
                
                # Rename the file
                os.rename(full_file_path, new_file_path)
                
        print(f"All filenames in '{directory_path}' updated successfully.")
    
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage:
add_minor_to_filenames(r"C:\Users\rapha\repositories\guitar_hero\data\raw\kaggle_chords\Test\Em")  # Replace with your directory path


All filenames in 'C:\Users\rapha\repositories\guitar_hero\data\raw\kaggle_chords\Test\Em' updated successfully.


## Considerations:

In [None]:
Consider implementing gradient clipping

In [None]:
Consider implementing data augmentation techniques specific to audio

In [3]:
Consider: Adding type hints for better code maintenance
Consider: Adding docstrings for better documentation

SyntaxError: invalid syntax (272345262.py, line 1)

In [None]:
Missing: Learning rate scheduling
Missing: Early stopping

In [None]:
Consider: Adding batch normalization layers
Consider: Implementing dropout for better regularization