In [56]:
# Block 1: Importing Libraries
import tarfile
import resampy
import pandas as pd
import librosa
import numpy as np
import os
import shutil
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from concurrent.futures import ThreadPoolExecutor

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")


GPU: NVIDIA GeForce RTX 4080 Laptop GPU is available.


In [57]:
# Block 2: Extracting Data
def extract_tar(tar_file, target_dir):
    if os.path.exists(target_dir):
        user_input = input(f"The directory '{target_dir}' already exists. Do you want to skip extraction? (y/n): ")
        if user_input.lower() == 'y':
            print(f"Skipping extraction of {tar_file}.")
            return
        else:
            print(f"Overwriting the existing directory '{target_dir}'.")
            shutil.rmtree(target_dir)

    with tarfile.open(tar_file, 'r') as tar:
        tar.extractall(target_dir)

    # Remove residue "._" hidden files from the inner folder
    inner_folder = os.path.join(target_dir, os.path.splitext(os.path.basename(tar_file))[0])
    for root, dirs, files in os.walk(inner_folder):
        for file in files:
            if file.startswith("._"):
                os.remove(os.path.join(root, file))

extract_tar('train_mp3s.tar', 'train_mp3s')
extract_tar('test_mp3s.tar', 'test_mp3s')
train_labels = np.loadtxt('train_label.txt', dtype=int)
train_labels = np.array([int(label) for label in train_labels])

Skipping extraction of train_mp3s.tar.
Skipping extraction of test_mp3s.tar.


In [58]:
# Block 3: Preprocessing Functions
def preprocess_audio(file_path):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        print(f"Loaded audio file: {file_path}")
        mfccs = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=128)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        print(f"Extracted MFCCs: {mfccs_scaled.shape}")
        return mfccs_scaled
    except Exception as e:
        print(f"Error processing file: {file_path}")
        print(f"Error message: {str(e)}")
        return None

def process_file(file_path):
    print(f"Processing file: {file_path}")
    mfccs = preprocess_audio(file_path)
    return mfccs

def prepare_data(directory):
    file_paths = [os.path.join(directory, f"{i}.mp3") for i in range(len(os.listdir(directory)))]
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(process_file, file_paths))
    features = [mfccs for mfccs in results if mfccs is not None]
    print(f"Processed {len(features)} audio files")
    return np.array(features)

In [59]:
# Block 4: Preparing Data
train_features_file = 'train_features.pkl'
test_features_file = 'test_features.pkl'

try:
    with open(train_features_file, 'rb') as f:
        train_features = pickle.load(f)
    print("Loaded train features from file.")
except FileNotFoundError:
    train_features = prepare_data('train_mp3s/train_mp3s')
    with open(train_features_file, 'wb') as f:
        pickle.dump(train_features, f)
    print("Saved train features to file.")

try:
    with open(test_features_file, 'rb') as f:
        test_features = pickle.load(f)
    print("Loaded test features from file.")
except FileNotFoundError:
    test_features = prepare_data('test_mp3s/test_mp3s')
    with open(test_features_file, 'wb') as f:
        pickle.dump(test_features, f)
    print("Saved test features to file.")

print(f"Train features shape: {train_features.shape}")
print(f"Test features shape: {test_features.shape}")

train_labels = np.array([int(label) for label in train_labels])
print(f"Train labels shape: {train_labels.shape}")
print(f"Number of training features: {len(train_features)}")
print(f"Number of training labels: {len(train_labels)}")
print(f"Number of test features: {len(test_features)}")

if len(train_features) == 0:
    print("No training features available. Please check the data.")

Loaded train features from file.
Loaded test features from file.
Train features shape: (11886, 40)
Test features shape: (2447, 40)
Train labels shape: (11886,)
Number of training features: 11886
Number of training labels: 11886
Number of test features: 2447


In [60]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
torch.cuda.empty_cache()

# Block 5: Model Training and Prediction
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

if len(train_features) > 0:
    if len(train_features) != len(train_labels):
        raise ValueError("Number of train features and labels do not match.")

    # Reshape the data to match the input shape of the CNN
    train_data = torch.tensor(train_features, dtype=torch.float32, device=device).unsqueeze(1)
    train_labels = torch.tensor(train_labels, dtype=torch.long, device=device)

    class CNN(nn.Module):
        def __init__(self, input_size, num_classes):
            super(CNN, self).__init__()
            self.conv1 = nn.Conv1d(1, 256, kernel_size=7, padding=3)
            self.bn1 = nn.BatchNorm1d(256)  # Added batch normalization
            self.relu1 = nn.ReLU()
            self.pool1 = nn.MaxPool1d(kernel_size=2)
            self.conv2 = nn.Conv1d(256, 512, kernel_size=5, padding=2)
            self.bn2 = nn.BatchNorm1d(512)  # Added batch normalization
            self.relu2 = nn.ReLU()
            self.pool2 = nn.MaxPool1d(kernel_size=2)
            self.conv3 = nn.Conv1d(512, 1024, kernel_size=3, padding=1)
            self.bn3 = nn.BatchNorm1d(1024)  # Added batch normalization
            self.relu3 = nn.ReLU()
            self.pool3 = nn.AdaptiveAvgPool1d(1)
            self.flatten = nn.Flatten()
            self.fc1 = nn.Linear(1024, 512)
            self.relu4 = nn.ReLU()
            self.dropout1 = nn.Dropout(0.8)
            self.fc2 = nn.Linear(512, 256)
            self.relu5 = nn.ReLU()
            self.dropout2 = nn.Dropout(0.6)
            self.fc3 = nn.Linear(256, 128)
            self.relu6 = nn.ReLU()
            self.dropout3 = nn.Dropout(0.4)
            self.fc4 = nn.Linear(128, num_classes)
            self.dropout4 = nn.Dropout(0.3)

            # Add L2 regularization
            self.conv1.weight_decay = 1e-3
            self.conv2.weight_decay = 1e-3
            self.conv3.weight_decay = 1e-3
            self.fc1.weight_decay = 1e-3
            self.fc2.weight_decay = 1e-3

        def forward(self, x):
            x = self.conv1(x)
            x = self.bn1(x)  # Added batch normalization
            x = self.relu1(x)
            x = self.pool1(x)
            x = self.conv2(x)
            x = self.bn2(x)  # Added batch normalization
            x = self.relu2(x)
            x = self.pool2(x)
            x = self.conv3(x)
            x = self.bn3(x)  # Added batch normalization
            x = self.relu3(x)
            x = self.pool3(x)
            x = self.flatten(x)
            x = self.fc1(x)
            x = self.relu4(x)
            x = self.dropout1(x)
            x = self.fc2(x)
            x = self.relu5(x)
            x = self.dropout2(x)
            x = self.fc3(x)
            x = self.relu6(x)
            x = self.dropout3(x)
            x = self.fc4(x)
            x = self.dropout4(x)
            return x

    input_size = train_data.shape[2]
    num_classes = 4
    num_epochs = 500  # Increased the number of epochs
    batch_size = 512

    model = CNN(input_size, num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=True)

    best_accuracy = 0.0
    early_stopping_counter = 0
    patience = 20  # Increased early stopping patience

    # Create data loader for the entire dataset
    train_dataset = torch.utils.data.TensorDataset(train_data, train_labels)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        train_accuracy = 0.0

        for batch_data, batch_labels in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * batch_data.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_accuracy += (predicted == batch_labels).sum().item()

        train_loss = train_loss / len(train_loader.dataset)
        train_accuracy = train_accuracy / len(train_loader.dataset)

        print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")

        # Early stopping based on training accuracy
        if train_accuracy > best_accuracy:
            best_accuracy = train_accuracy
            early_stopping_counter = 0
            # Save the best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        scheduler.step(train_loss)

    # Load the best model
    model.load_state_dict(torch.load('best_model.pth'))

    # Generate submission.csv
    test_features = torch.tensor(test_features, dtype=torch.float32, device=device).unsqueeze(1)
    model.eval()
    with torch.no_grad():
        outputs = model(test_features)
        _, predicted_labels = torch.max(outputs, 1)
        predicted_labels = predicted_labels.cpu().tolist()

    submission = pd.DataFrame({'id': range(len(predicted_labels)), 'category': predicted_labels})
    submission.to_csv('submission.csv', index=False)

else:
    print("No training features available. Please check the data.")

torch.cuda.empty_cache()

Using device: cuda
Epoch [1/500] - Train Loss: 1.3022, Train Accuracy: 0.3377
Epoch [2/500] - Train Loss: 1.2135, Train Accuracy: 0.4001
Epoch [3/500] - Train Loss: 1.1308, Train Accuracy: 0.4521
Epoch [4/500] - Train Loss: 1.0130, Train Accuracy: 0.5547
Epoch [5/500] - Train Loss: 0.9565, Train Accuracy: 0.5862
Epoch [6/500] - Train Loss: 0.9254, Train Accuracy: 0.6007
Epoch [7/500] - Train Loss: 0.9151, Train Accuracy: 0.6104
Epoch [8/500] - Train Loss: 0.8990, Train Accuracy: 0.6103
Epoch [9/500] - Train Loss: 0.8677, Train Accuracy: 0.6257
Epoch [10/500] - Train Loss: 0.8511, Train Accuracy: 0.6279
Epoch [11/500] - Train Loss: 0.8478, Train Accuracy: 0.6259
Epoch [12/500] - Train Loss: 0.8375, Train Accuracy: 0.6438
Epoch [13/500] - Train Loss: 0.8220, Train Accuracy: 0.6394
Epoch [14/500] - Train Loss: 0.8186, Train Accuracy: 0.6437
Epoch [15/500] - Train Loss: 0.7919, Train Accuracy: 0.6566
Epoch [16/500] - Train Loss: 0.7801, Train Accuracy: 0.6619
Epoch [17/500] - Train Loss: 0