In [16]:
# Block 1: Importing Libraries
import random
import tarfile
import resampy
import pandas as pd
import librosa
import numpy as np
import os
import shutil
import concurrent.futures
import pickle
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from concurrent.futures import ThreadPoolExecutor
from torch.optim.lr_scheduler import CosineAnnealingLR
import albumentations as A
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

GPU: NVIDIA GeForce RTX 4080 Laptop GPU is available.


In [17]:
# Block 2: Extracting Data
def extract_tar(tar_file, target_dir):
    if os.path.exists(target_dir):
        user_input = input(f"The directory '{target_dir}' already exists. Do you want to skip extraction? (y/n): ")
        if user_input.lower() == 'y':
            print(f"Skipping extraction of {tar_file}.")
            return
        else:
            print(f"Overwriting the existing directory '{target_dir}'.")
            shutil.rmtree(target_dir)
    with tarfile.open(tar_file, 'r') as tar:
        tar.extractall(target_dir)
    # Remove residue "._" hidden files from the inner folder
    inner_folder = os.path.join(target_dir, os.path.splitext(os.path.basename(tar_file))[0])
    for root, dirs, files in os.walk(inner_folder):
        for file in files:
            if file.startswith("._"):
                os.remove(os.path.join(root, file))

extract_tar('train_mp3s.tar', 'train_mp3s')
extract_tar('test_mp3s.tar', 'test_mp3s')
train_labels = np.loadtxt('train_label.txt', dtype=int)


Skipping extraction of train_mp3s.tar.
Skipping extraction of test_mp3s.tar.


In [18]:
# Block 3: Preprocessing Functions
def save_preprocessed_data(train_features, train_labels, test_features, folder_path):
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    with open(os.path.join(folder_path, 'train_features.pkl'), 'wb') as f:
        pickle.dump(train_features, f)

    with open(os.path.join(folder_path, 'train_labels.pkl'), 'wb') as f:
        pickle.dump(train_labels, f)

    with open(os.path.join(folder_path, 'test_features.pkl'), 'wb') as f:
        pickle.dump(test_features, f)

def load_preprocessed_data(folder_path):
    with open(os.path.join(folder_path, 'train_features.pkl'), 'rb') as f:
        train_features = pickle.load(f)

    with open(os.path.join(folder_path, 'train_labels.pkl'), 'rb') as f:
        train_labels = pickle.load(f)

    with open(os.path.join(folder_path, 'test_features.pkl'), 'rb') as f:
        test_features = pickle.load(f)

    return train_features, train_labels, test_features

def extract_mfcc(audio, sample_rate):
    return librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

def extract_mel_spec(audio, sample_rate):
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
    return mel_spec

def extract_tonnetz(audio, sample_rate):
    return librosa.feature.tonnetz(y=audio, sr=sample_rate)

def extract_chroma_stft(audio, sample_rate):
    return librosa.feature.chroma_stft(y=audio, sr=sample_rate)

def extract_chroma_cqt(audio, sample_rate):
    return librosa.feature.chroma_cqt(y=audio, sr=sample_rate)

def extract_chroma_cens(audio, sample_rate):
    return librosa.feature.chroma_cens(y=audio, sr=sample_rate)

def apply_specmix(mel_spec, label, train_features, train_labels, num_mixes=2, alpha=0.2):
    # Get the indices of samples with the same label
    same_label_indices = np.where(train_labels == label)[0]

    if len(same_label_indices) < num_mixes:
        # If there are not enough samples with the same label, use all available samples
        mix_indices = same_label_indices
    else:
        # Randomly select num_mixes samples with the same label
        mix_indices = np.random.choice(same_label_indices, size=num_mixes, replace=False)

    # Get the mel spectrograms of the selected samples
    mix_mel_specs = train_features[mix_indices]

    # Generate mixing weights using the Beta distribution
    weights = np.random.beta(alpha, alpha, size=len(mix_indices))
    weights_norm = weights / np.sum(weights)

    # Truncate or pad the selected mel spectrograms to match the shape of the input mel spectrogram
    target_length = mel_spec.shape[1]
    mix_mel_specs_resized = []
    for spec in mix_mel_specs:
        if len(spec.shape) == 1:
            # If spec is 1-dimensional, reshape it to 2-dimensional
            spec = spec.reshape(1, -1)
        if spec.shape[1] > target_length:
            # Truncate the spectrogram if it is longer than the target length
            spec = spec[:, :target_length]
        elif spec.shape[1] < target_length:
            # Pad the spectrogram with zeros if it is shorter than the target length
            pad_width = target_length - spec.shape[1]
            spec = np.pad(spec, ((0, 0), (0, pad_width)), mode='constant')
        mix_mel_specs_resized.append(spec)

    # Mix the mel spectrograms using the generated weights
    mixed_mel_spec = np.zeros_like(mel_spec)
    for i in range(len(mix_indices)):
        mixed_mel_spec += weights_norm[i] * mix_mel_specs_resized[i]

    return mixed_mel_spec

def apply_audio_augmentation(mel_spec, label, train_features, train_labels):
    augmented_mel_spec = apply_specmix(mel_spec, label, train_features, train_labels)
    return augmented_mel_spec

def preprocess_audio(file_path, label, train_features, train_labels, augment=False):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        print(f"Loaded audio file: {file_path}")

        mel_spec = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)

        if augment and label is not None and train_features is not None and train_labels is not None:
            augmented_mel_spec = apply_audio_augmentation(mel_spec, label, train_features, train_labels)
            features_scaled = np.mean(augmented_mel_spec.T, axis=0)
        else:
            features_scaled = np.mean(mel_spec.T, axis=0)

        print(f"Extracted features: {features_scaled.shape}")
        return features_scaled
    except Exception as e:
        print(f"Error processing file: {file_path}")
        print(f"Error message: {str(e)}")
        return None

def process_file(file_path, label, train_features, train_labels, augment=False):
    print(f"Processing file: {file_path}")
    if label is None:
        # If label is None, pass None for train_features and train_labels as well
        features = preprocess_audio(file_path, None, None, None, augment=augment)
    else:
        features = preprocess_audio(file_path, label, train_features, train_labels, augment=augment)
    return features

def prepare_data(directory, train_features, train_labels, augment=False):
    file_paths = [os.path.join(directory, f"{i}.mp3") for i in range(len(os.listdir(directory)))]
    labels = train_labels.tolist() if train_labels is not None else [None] * len(file_paths)
    
    with ThreadPoolExecutor(max_workers=32) as executor:
        results = list(executor.map(lambda x: process_file(x[0], x[1], train_features, train_labels, augment), zip(file_paths, labels)))
    
    features = [feat for feat in results if feat is not None]
    
    if not features:
        raise ValueError("No audio files were successfully processed.")
    
    features = np.array(features)
    print(f"Processed {len(features)} audio files")
    return features



In [19]:
# Block 4: Preparing Data (modified)
folder_path = 'V6'

try:
    train_features, train_labels, test_features = load_preprocessed_data(folder_path)
    print("Loaded preprocessed data from the 'V6' folder.")
    label_encoder = LabelEncoder()
    train_labels_encoded = label_encoder.fit_transform(train_labels)

except FileNotFoundError:
    label_encoder = LabelEncoder()
    train_labels_encoded = label_encoder.fit_transform(train_labels)

    train_features_original = prepare_data('train_mp3s/train_mp3s', np.empty((0, 128)), train_labels_encoded)
    print(f"Original train features shape: {train_features_original.shape}")

    train_features_augmented = prepare_data('train_mp3s/train_mp3s', train_features_original, train_labels_encoded, augment=True)
    print(f"Augmented train features shape: {train_features_augmented.shape}")

    train_features = np.concatenate((train_features_original, train_features_augmented), axis=0)
    print(f"Combined train features shape: {train_features.shape}")

    test_features = prepare_data('test_mp3s/test_mp3s', np.empty((0, 128)), None)
    print(f"Test features shape: {test_features.shape}")

    train_labels_augmented = train_labels_encoded.copy()
    train_labels_encoded = np.concatenate((train_labels_encoded, train_labels_augmented), axis=0)
    print(f"Train labels shape: {train_labels_encoded.shape}")

    print(f"Number of training features: {len(train_features)}")
    print(f"Number of training labels: {len(train_labels_encoded)}")
    print(f"Number of test features: {len(test_features)}")

    save_preprocessed_data(train_features, train_labels_encoded, test_features, folder_path)
    print(f"Saved preprocessed data to the {folder_path} folder.")

if len(train_features) == 0:
    print("No training features available. Please check the data.")

# Reshape the features to have a channel dimension of 1
train_features = train_features.reshape(-1, 1, train_features.shape[1])
test_features = test_features.reshape(-1, 1, test_features.shape[1])
print(test_features.shape)
print(train_features.shape)

Loaded preprocessed data from the 'V6' folder.
(2447, 1, 128)
(23772, 1, 128)


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

torch.cuda.empty_cache()

# Block 5: Model Training and Prediction
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if len(train_features) > 0:
    if len(train_features) != len(train_labels):
        raise ValueError("Number of train features and labels do not match.")

    # Normalize the input data
    scaler = StandardScaler()
    train_features = scaler.fit_transform(train_features.reshape(-1, 128)).reshape(-1, 1, 128)
    test_features = scaler.transform(test_features.reshape(-1, 128)).reshape(-1, 1, 128)

    train_data, val_data, train_labels, val_labels = train_test_split(train_features, train_labels, test_size=0.3, random_state=42)
    train_data = torch.tensor(train_data, dtype=torch.float32, device=device)
    val_data = torch.tensor(val_data, dtype=torch.float32, device=device)
    train_labels = torch.tensor(train_labels, dtype=torch.long, device=device)
    val_labels = torch.tensor(val_labels, dtype=torch.long, device=device)

    model = nn.Sequential(
        nn.Linear(128, 2048),
        nn.BatchNorm1d(2048),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(2048, 2048),
        nn.BatchNorm1d(2048),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(2048, 1024),
        nn.BatchNorm1d(1024),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(1024, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(512, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(256, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(128, 4)
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    num_epochs = 1000
    batch_size = 1024

    train_dataset = torch.utils.data.TensorDataset(train_data, train_labels)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        for batch_data, batch_labels in train_loader:
            batch_data = batch_data.squeeze(1)  # Remove the extra dimension
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            train_data = train_data.squeeze(1)  # Remove the extra dimension
            train_outputs = model(train_data)
            train_loss = criterion(train_outputs, train_labels)
            _, train_predicted = torch.max(train_outputs, 1)
            train_accuracy = (train_predicted == train_labels).float().mean()

            val_data = val_data.squeeze(1)  # Remove the extra dimension
            val_outputs = model(val_data)
            val_loss = criterion(val_outputs, val_labels)
            _, val_predicted = torch.max(val_outputs, 1)
            val_accuracy = (val_predicted == val_labels).float().mean()

            print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.4f}, Train Accuracy: {train_accuracy.item():.4f}, Val Loss: {val_loss.item():.4f}, Val Accuracy: {val_accuracy.item():.4f}")

    test_features = torch.tensor(test_features, dtype=torch.float32, device=device)
    test_features = test_features.squeeze(1)  # Remove the extra dimension
    model.eval()
    with torch.no_grad():
        outputs = model(test_features)
        _, predicted_labels = torch.max(outputs, 1)
        predicted_labels = predicted_labels.cpu().tolist()

    submission = pd.DataFrame({'id': range(len(predicted_labels)), 'category': predicted_labels})
    submission.to_csv('submission.csv', index=False)
else:
    print("No training features available. Please check the data.")
model.cpu()  # Move the model to CPU
torch.cuda.empty_cache()  # Clear GPU memory cache

Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\mojii/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100.0%


Epoch [1/50], Train Loss: 1.1605, Train Accuracy: 0.4701, Val Loss: 1.1980, Val Accuracy: 0.4759
Epoch [2/50], Train Loss: 1.1686, Train Accuracy: 0.4554, Val Loss: 1.1736, Val Accuracy: 0.4624
Epoch [3/50], Train Loss: 1.3202, Train Accuracy: 0.4409, Val Loss: 1.3314, Val Accuracy: 0.4435
Epoch [4/50], Train Loss: 1.0326, Train Accuracy: 0.5201, Val Loss: 1.0740, Val Accuracy: 0.5035
Epoch [5/50], Train Loss: 1.0268, Train Accuracy: 0.5207, Val Loss: 1.0777, Val Accuracy: 0.5034
Epoch [6/50], Train Loss: 1.0106, Train Accuracy: 0.5262, Val Loss: 1.0856, Val Accuracy: 0.4826
Epoch [7/50], Train Loss: 0.8872, Train Accuracy: 0.5885, Val Loss: 0.9803, Val Accuracy: 0.5610
Epoch [8/50], Train Loss: 1.0216, Train Accuracy: 0.5130, Val Loss: 1.1374, Val Accuracy: 0.4802
Epoch [9/50], Train Loss: 0.9053, Train Accuracy: 0.6035, Val Loss: 1.0493, Val Accuracy: 0.5552
Epoch [10/50], Train Loss: 0.8475, Train Accuracy: 0.6072, Val Loss: 1.0216, Val Accuracy: 0.5499
Epoch [11/50], Train Loss: 0.

  return F.conv2d(input, weight, bias, self.stride,
