In [1]:
import os
import glob
import librosa
import numpy as np
import pandas as pd
import torch
import torchvision



In [2]:
audio_files_commercial = glob.glob('audio_directory/commercial/*.mp3')
audio_files_non_commercial = glob.glob('audio_directory/non_commercial/*.mp3')

In [9]:
import torch
import torch.nn as nn
import librosa
import numpy as np
import pandas as pd

def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T, axis=0)
     
    return mfccs_processed

# Determine the maximum length of the audio files
max_duration = max(max(len(audio) for audio in audio_files_commercial), max(len(audio) for audio in audio_files_non_commercial))
label_dict = {'commercial': 0, 'non_commercial': 1}
features = []

# Iterate through each sound file and extract the features
for file_name in audio_files_commercial + audio_files_non_commercial:
    class_label = 'commercial' if 'commercial' in file_name else 'non_commercial'
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')

        # Apply padding
        if len(audio) < max_duration:
            audio = librosa.util.pad_center(audio, max_duration)

        data = extract_features(file_name)  # Call the extract_features() function correctly
        features.append([data, label_dict[class_label]])
    except KeyError:
        print(f'Error: Invalid class label {class_label} for file {file_name}')
    except Exception as e:
        print(f'Error: {e} occurred for file {file_name}')

# Convert into a Pandas DataFrame
    features_df = pd.DataFrame(features, columns=['feature', 'class_label'])
    features_df.to_csv('audio_features11.csv', index=False)



KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn

class CRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(CRNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.rnn = nn.GRU(input_size=64, hidden_size=hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add a channel dimension for Conv2d
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        x = x.squeeze(2)  # Remove the dimension added by MaxPool2d
        x, _ = self.rnn(x)
        x = self.fc(x[:, -1, :])  # Use the last output of the RNN
        return x

# Define the hyperparameters
input_size = 40  # Number of MFCC coefficients
hidden_size = 128  # Size of the RNN hidden state
num_classes = 2  # Number of classes (commercial and non-commercial)

# Create an instance of the CRNN model
model = CRNN(input_size, hidden_size, num_classes)

# Load the audio features from the generated CSV file
features_df = pd.read_csv('audio_features11.csv')
features = np.vstack(features_df['feature'].values)
labels = features_df['class_label'].values

# Convert the features and labels to PyTorch tensors
features = torch.tensor(features, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.long)

# Create a DataLoader to handle batching and shuffling of the data
dataset = torch.utils.data.TensorDataset(features, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the CRNN model
num_epochs = 10
for epoch in range(num_epochs):
    for batch_features, batch_labels in dataloader:
        # Forward pass
        outputs = model(batch_features)

        # Compute the loss
        loss = criterion(outputs, batch_labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print the loss for every epoch
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
