In [1]:
import os
import numpy as np
import librosa
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Set the seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

# Define the paths to the audio files
commercial_path = 'audio_directory/commercial'
non_commercial_path = 'audio_directory/non_commercial'

# Prepare empty lists for features and labels
fingerprints = []
labels = []

def extract_fingerprints(audio_path):
    audio, sr = librosa.load(audio_path)  # Load the audio file

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(audio, sr=sr, n_mfcc=13)

    # Extract Chroma features
    chroma = librosa.feature.chroma_stft(audio, sr=sr)

    # Extract Spectral Contrast features
    contrast = librosa.feature.spectral_contrast(audio, sr=sr)

    # Extract Tonnetz features
    tonnetz = librosa.feature.tonnetz(audio, sr=sr)

    # Stack all features along the channel dimension
    fingerprint = np.stack((mfcc, chroma, contrast, tonnetz), axis=0)
    return fingerprint

# Read and extract fingerprints for commercial audio files
commercial_files = os.listdir(commercial_path)
for file in commercial_files:
    if file.endswith('.mp3'):
        audio_path = os.path.join(commercial_path, file)
        fingerprint = extract_fingerprints(audio_path)
        fingerprints.append(fingerprint)
        labels.append(1)  # 1 represents commercial audio

# Read and extract fingerprints for non-commercial audio files
non_commercial_files = os.listdir(non_commercial_path)
for file in non_commercial_files:
    if file.endswith('.mp3'):
        audio_path = os.path.join(non_commercial_path, file)
        fingerprint = extract_fingerprints(audio_path)
        fingerprints.append(fingerprint)
        labels.append(0)  # 0 represents non-commercial audio

# Convert the lists to numpy arrays
fingerprints = np.array(fingerprints)
labels = np.array(labels)

# Preprocessing data
scaler = StandardScaler()
fingerprints = fingerprints.reshape(fingerprints.shape[0], -1)  # Flatten the fingerprints
fingerprints = scaler.fit_transform(fingerprints)
fingerprints = fingerprints.reshape(fingerprints.shape[0], 4, -1)  # Reshape the flattened fingerprints

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(fingerprints, labels, test_size=0.2, random_state=seed, shuffle=True)

# Convert data to PyTorch tensors
X_train = torch.Tensor(X_train)
X_test = torch.Tensor(X_test)
y_train = torch.Tensor(y_train)
y_test = torch.Tensor(y_test)

# Create a DataLoader for training
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

# Define the model architecture
model = nn.Sequential(
    nn.Conv1d(4, 32, kernel_size=3),
    nn.ReLU(),
    nn.MaxPool1d(kernel_size=2),
    nn.Conv1d(32, 64, kernel_size=3),
    nn.ReLU(),
    nn.MaxPool1d(kernel_size=2),
    nn.Flatten(),
    nn.Linear(64 * 6, 128),
    nn.ReLU(),
    nn.Linear(128, 1),
    nn.Sigmoid()  # Use sigmoid activation for binary classification
)

# Define the loss function and optimizer
loss_function = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, target in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = loss_function(outputs.squeeze(), target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, running_loss/len(train_loader)))

# Evaluation on the test set
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    test_predicted = (test_outputs > 0.5).float()
    test_correct = (test_predicted == y_test).sum().item()
    test_total = y_test.size(0)
    test_accuracy = test_correct / test_total
    print('Test Accuracy: {:.2%}'.format(test_accuracy))

# Save the trained model
torch.save(model.state_dict(), 'model.pth')


KeyboardInterrupt: 