In [21]:
# Block 1: Importing Libraries
import tarfile
import resampy
import pandas as pd
import librosa
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from concurrent.futures import ThreadPoolExecutor

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")


GPU: NVIDIA GeForce RTX 4080 Laptop GPU is available.


In [22]:
# Block 2: Extracting Data
def extract_tar(tar_file, target_dir):
    if os.path.exists(target_dir):
        user_input = input(f"The directory '{target_dir}' already exists. Do you want to skip extraction? (y/n): ")
        if user_input.lower() == 'y':
            print(f"Skipping extraction of {tar_file}.")
            return
        else:
            print(f"Overwriting the existing directory '{target_dir}'.")
            shutil.rmtree(target_dir)

    with tarfile.open(tar_file, 'r') as tar:
        tar.extractall(target_dir)

    # Remove residue "._" hidden files from the inner folder
    inner_folder = os.path.join(target_dir, os.path.splitext(os.path.basename(tar_file))[0])
    for root, dirs, files in os.walk(inner_folder):
        for file in files:
            if file.startswith("._"):
                os.remove(os.path.join(root, file))

extract_tar('train_mp3s.tar', 'train_mp3s')
extract_tar('test_mp3s.tar', 'test_mp3s')
train_labels = np.loadtxt('train_label.txt', dtype=int)
train_labels = np.array([int(label) for label in train_labels])

Skipping extraction of train_mp3s.tar.
Skipping extraction of test_mp3s.tar.


In [23]:
# Block 3: Preprocessing Functions
def preprocess_audio(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        print(f"Loaded audio file: {file_path}")
        mfccs = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        print(f"Extracted MFCCs: {mfccs_scaled.shape}")
        return mfccs_scaled
    except Exception as e:
        print(f"Error processing file: {file_path}")
        print(f"Error message: {str(e)}")
        return None

def process_file(file_path):
    print(f"Processing file: {file_path}")
    mfccs = preprocess_audio(file_path)
    return mfccs

def prepare_data(directory):
    file_paths = [os.path.join(directory, f"{i}.mp3") for i in range(len(os.listdir(directory)))]
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(process_file, file_paths))
    features = [mfccs for mfccs in results if mfccs is not None]
    print(f"Processed {len(features)} audio files")
    return np.array(features)

In [24]:
# Block 4: Preparing Data
train_features_file = 'train_features_mel.pkl'
test_features_file = 'test_features_mel.pkl'

try:
    with open(train_features_file, 'rb') as f:
        train_features = pickle.load(f)
    print("Loaded train features from file.")
except FileNotFoundError:
    train_features = prepare_data('train_mp3s/train_mp3s')
    with open(train_features_file, 'wb') as f:
        pickle.dump(train_features, f)
    print("Saved train features to file.")

try:
    with open(test_features_file, 'rb') as f:
        test_features = pickle.load(f)
    print("Loaded test features from file.")
except FileNotFoundError:
    test_features = prepare_data('test_mp3s/test_mp3s')
    with open(test_features_file, 'wb') as f:
        pickle.dump(test_features, f)
    print("Saved test features to file.")

print(f"Train features shape: {train_features.shape}")
print(f"Test features shape: {test_features.shape}")

train_labels = np.array([int(label) for label in train_labels])
print(f"Train labels shape: {train_labels.shape}")
print(f"Number of training features: {len(train_features)}")
print(f"Number of training labels: {len(train_labels)}")
print(f"Number of test features: {len(test_features)}")

if len(train_features) == 0:
    print("No training features available. Please check the data.")

Loaded train features from file.
Loaded test features from file.
Train features shape: (11886, 128)
Test features shape: (2447, 128)
Train labels shape: (11886,)
Number of training features: 11886
Number of training labels: 11886
Number of test features: 2447


In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import pandas as pd

torch.cuda.empty_cache()

# Block 5: Model Training and Prediction
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if len(train_features) > 0:
    if len(train_features) != len(train_labels):
        raise ValueError("Number of train features and labels do not match.")

    train_data, val_data, train_labels, val_labels = train_test_split(train_features, train_labels, test_size=0.3, random_state=42)
    train_data = torch.tensor(train_data, dtype=torch.float32, device=device)
    val_data = torch.tensor(val_data, dtype=torch.float32, device=device)
    train_labels = torch.tensor(train_labels, dtype=torch.long, device=device)
    val_labels = torch.tensor(val_labels, dtype=torch.long, device=device)

    model = nn.Sequential(
        nn.Linear(128, 2048),
        nn.BatchNorm1d(2048),
        nn.ReLU(),
        nn.Dropout(0.8),
        nn.Linear(2048, 1024),
        nn.BatchNorm1d(1024),
        nn.ReLU(),
        nn.Dropout(0.6),
        nn.Linear(1024, 512),
        nn.BatchNorm1d(512),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(512, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(256, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Dropout(0.5),
        nn.Linear(128, 64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(64, 4)
    ).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

    num_epochs =  500
    batch_size = 1024

    train_dataset = torch.utils.data.TensorDataset(train_data, train_labels)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        for batch_data, batch_labels in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        model.eval()
        with torch.no_grad():
            outputs = model(val_data)
            loss = criterion(outputs, val_labels)
            _, predicted = torch.max(outputs, 1)
            accuracy = (predicted == val_labels).float().mean()
            print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {loss.item():.4f}, Validation Accuracy: {accuracy.item():.4f}")
            scheduler.step(loss)
    torch.save(model.state_dict(), 'trained_model.pth')
    test_features = torch.tensor(test_features, dtype=torch.float32, device=device)
    model.eval()
    with torch.no_grad():
        outputs = model(test_features)
        _, predicted_labels = torch.max(outputs, 1)
        predicted_labels = predicted_labels.cpu().tolist()

    submission = pd.DataFrame({'id': range(len(predicted_labels)), 'category': predicted_labels})
    submission.to_csv('submission.csv', index=False)

else:
    print("No training features available. Please check the data.")

torch.cuda.empty_cache()

Using device: cuda
Epoch [1/500], Validation Loss: 1.3615, Validation Accuracy: 0.3699
Epoch [2/500], Validation Loss: 1.3230, Validation Accuracy: 0.3898
Epoch [3/500], Validation Loss: 1.2616, Validation Accuracy: 0.4431
Epoch [4/500], Validation Loss: 1.1949, Validation Accuracy: 0.4790
Epoch [5/500], Validation Loss: 1.1590, Validation Accuracy: 0.4846
Epoch [6/500], Validation Loss: 1.1333, Validation Accuracy: 0.4986
Epoch [7/500], Validation Loss: 1.1169, Validation Accuracy: 0.5146
Epoch [8/500], Validation Loss: 1.1050, Validation Accuracy: 0.5174
Epoch [9/500], Validation Loss: 1.0841, Validation Accuracy: 0.5179
Epoch [10/500], Validation Loss: 1.0663, Validation Accuracy: 0.5185
Epoch [11/500], Validation Loss: 1.0486, Validation Accuracy: 0.5311
Epoch [12/500], Validation Loss: 1.0400, Validation Accuracy: 0.5398
Epoch [13/500], Validation Loss: 1.0216, Validation Accuracy: 0.5468
Epoch [14/500], Validation Loss: 1.0119, Validation Accuracy: 0.5508
Epoch [15/500], Validati

KeyboardInterrupt: 