In [54]:
import torch
import torch.nn as nn
import torchaudio
import torchvggish
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import torchaudio.transforms as T

class ChordDataset(Dataset):
    def __init__(self, file_paths, labels, sample_rate=16000):
        self.file_paths = file_paths
        self.labels = labels
        self.sample_rate = sample_rate
        
        # VGGish expects 96 mel bands
        self.mel_transform = T.MelSpectrogram(
            sample_rate=sample_rate,
            n_fft=2048,
            win_length=400,
            hop_length=160,
            n_mels=96,
            f_min=125,
            f_max=7500
        )
        
        # Log mel spectrogram
        self.amplitude_to_db = T.AmplitudeToDB()
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        # Load audio
        waveform, sr = torchaudio.load(self.file_paths[idx])
        #print(f"Getting: {self.file_paths[idx]}")
        
        # Resample if necessary
        if sr != self.sample_rate:
            resampler = T.Resample(sr, self.sample_rate)
            waveform = resampler(waveform)
        
        # Convert to mono if stereo
        if waveform.size(0) > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)
        
        # Get mel spectrogram
        mel_spec = self.mel_transform(waveform)
        # Convert to dB scale
        mel_spec = self.amplitude_to_db(mel_spec)
        #print(f"Getting mel_spec: {mel_spec}")
        
        # VGGish expects input size of (batch_size, 1, 96, 64)
        # So we need to ensure our time dimension is 64 frames
        target_length = 64
        current_length = mel_spec.size(2)
        
        if current_length < target_length:
            # Pad if too short
            padding = target_length - current_length
            mel_spec = torch.nn.functional.pad(mel_spec, (0, padding))
            
        elif current_length > target_length:
            # Take center portion if too long
            start = (current_length - target_length) // 2
            mel_spec = mel_spec[:, :, start:start + target_length]
        
        # Add channel dimension
       # mel_spec = mel_spec.unsqueeze(0)
            
        return mel_spec, torch.tensor(self.labels[idx], dtype=torch.float32)

class ChordClassifier(nn.Module):
    def __init__(self, num_classes=1):
        super().__init__()
        self.feature_extractor = torchvggish.vggish()

        # Freeze VGGish parameters
        for name, param in self.feature_extractor.named_parameters():
            param.requires_grad = False
            print(f"Frozen parameter: {name}, requires_grad: {param.requires_grad}")
        
            
        # Simple classifier on top of VGGish embeddings
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),  # VGGish outputs 128-dimensional embeddings
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, num_classes),
            nn.Sigmoid()
        )


    def forward(self, x):
        # Remove torch.no_grad() here
        features = self.feature_extractor(x)  # Remove torch.no_grad() here
        return self.classifier(features)


class ChordTrainer:
    def __init__(self, model, criterion, optimizer, device, threshold=0.5):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.device = device
        self.threshold = threshold
        
    def train_epoch(self, dataloader):

        self.model.train()
        running_loss = 0.0

            # Verify parameters are unfrozen
        for param in self.model.parameters():
            param.requires_grad = True
        
        for i, (inputs, labels) in enumerate(dataloader):
            print("Processing batch", i+1)
            
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            
            self.optimizer.zero_grad()
            
            #for name, param in model.named_parameters():
             #   if param.requires_grad:
              #      print(f"Trainable parameter: {name}, shape: {param.shape}")
            
                        
            outputs = self.model(inputs)
            loss = self.criterion(outputs.squeeze(), labels)
            loss.backward()
            self.optimizer.step()
            
            running_loss += loss.item()
            
            if (i + 1) % 10 == 0:
                avg_loss = running_loss / 10
                print(f'Batch {i+1}, Loss: {avg_loss:.4f}')
                running_loss = 0.0

    
                
    def evaluate(self, dataloader):
        self.model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                predicted = (outputs.squeeze() > self.threshold).float()
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        return classification_report(
            all_labels, 
            all_preds, 
            target_names=["Minor", "Major"], 
            zero_division=0
        )

def get_dataloader(file_paths, labels, batch_size=16, shuffle=True):
    dataset = ChordDataset(file_paths, labels)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

def main():
    # Setup
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    file_dir = r'C:\Users\rapha\repositories\guitar_hero\data\raw'
    
    # Prepare data
    file_paths = [os.path.join(file_dir, f) for f in os.listdir(file_dir) if f.endswith('.wav')]
    labels = [0 if 'Minor' in f else 1 for f in os.listdir(file_dir) if f.endswith('.wav')]
    
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=0.2, random_state=42, stratify=labels
    )

    #Changed for train set evaluation!
    train_dataloader = get_dataloader(file_paths, labels)
    test_dataloader = get_dataloader(test_paths, test_labels)
    
        # Initialize model
    model = ChordClassifier().to(device)
    
    # Check trainable parameters
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total trainable parameters: {total_trainable_params}")
    
    # Training setup
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.2)
    trainer = ChordTrainer(model, criterion, optimizer, device, threshold=0.2)
    
    # Training loop
    num_epochs = 2
    for epoch in range(num_epochs):
        print(f'\nEpoch {epoch + 1}/{num_epochs}')
        trainer.train_epoch(train_dataloader)
        
    # Evaluation
    report = trainer.evaluate(test_dataloader)
    print('\nTest Results:')
    print(report)

if __name__ == '__main__':
    main()

Frozen parameter: features.0.weight, requires_grad: False
Frozen parameter: features.0.bias, requires_grad: False
Frozen parameter: features.3.weight, requires_grad: False
Frozen parameter: features.3.bias, requires_grad: False
Frozen parameter: features.6.weight, requires_grad: False
Frozen parameter: features.6.bias, requires_grad: False
Frozen parameter: features.8.weight, requires_grad: False
Frozen parameter: features.8.bias, requires_grad: False
Frozen parameter: features.11.weight, requires_grad: False
Frozen parameter: features.11.bias, requires_grad: False
Frozen parameter: features.13.weight, requires_grad: False
Frozen parameter: features.13.bias, requires_grad: False
Frozen parameter: embeddings.0.weight, requires_grad: False
Frozen parameter: embeddings.0.bias, requires_grad: False
Frozen parameter: embeddings.2.weight, requires_grad: False
Frozen parameter: embeddings.2.bias, requires_grad: False
Frozen parameter: embeddings.4.weight, requires_grad: False
Frozen parameter

In [11]:
from torch.utils.data import Dataset, DataLoader
import os
import torch
import torchaudio.transforms as T
from sklearn.model_selection import train_test_split

class ChordDataset(Dataset):
    def __init__(self, file_paths, labels, transform=None):
        self.file_paths = file_paths  # List of file paths to .wav files
        self.labels = labels  # List of labels (0 for minor, 1 for major)
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]
        
        waveform, sample_rate = torchaudio.load(file_path)  # Load the .wav file
        
        # Convert waveform to Mel-spectrogram
        mel_spec_transform = T.MelSpectrogram(sample_rate=sample_rate, n_mels=64)
        mel_spectrogram = mel_spec_transform(waveform)
        
        # Add a channel dimension for the CNN input
        mel_spectrogram = mel_spectrogram
        
        if self.transform:
            mel_spectrogram = self.transform(mel_spectrogram)
        
        return mel_spectrogram, torch.tensor(label, dtype=torch.float32)

# Example usage for DataLoader
def get_dataloader(file_paths, labels, batch_size=16, shuffle=True):
    dataset = ChordDataset(file_paths, labels)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader


In [13]:
def train_model(model, dataloader, num_epochs=10, learning_rate=0.2):
    criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(dataloader):
            # Move data to the appropriate device (GPU or CPU)
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Print statistics
            running_loss += loss.item()
            if i % 10 == 9:  # Print every 10 mini-batches
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss / 10:.4f}')
                running_loss = 0.0

    print('Finished Training')

# Example device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate the VGGish model (pretrained)
torchvggish.download_vggish_weights()  # Download VGGish pretrained weights
pretrained_model = torchvggish.VGGish()  # Load pretrained VGGish model

# Instantiate your classifier with the pretrained model as an intermediate layer
model = ChordClassifier(pretrained_model).to(device)
# Continue with training, evaluation, etc., as per your previous implementation

# Continue with training, evaluation, etc., as per your previous implementation

# Define the path to the directory containing the .wav files
file_dir = r'C:\Users\rapha\repositories\guitar_hero\data\raw'

# Get a list of all .wav files in the directory
file_paths = [os.path.join(file_dir, f) for f in os.listdir(file_dir) if f.endswith('.wav')]

labels = [0 if 'Minor' in f else 1 for f in os.listdir(file_dir) if f.endswith('.wav')]  # 0 for Minor, 1 for Major

# Split data into train and test sets (80% train, 20% test)
train_paths, test_paths, train_labels, test_labels = train_test_split(file_paths, labels, test_size=0.2, random_state=42, stratify=labels)

# Get the DataLoader for training and testing sets
train_dataloader = get_dataloader(train_paths, train_labels)
test_dataloader = get_dataloader(test_paths, test_labels)

# Train the model on the training data
train_model(model, train_dataloader, num_epochs=2, learning_rate=0.2)

# Evaluate the model on the test data using the evaluate method from before
model.evaluate(test_dataloader, device)


AttributeError: module 'torchvggish' has no attribute 'download_vggish_weights'