# [FMA: A Dataset For Music Analysis](https://github.com/mdeff/fma)

Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson, EPFL LTS2.

## Baselines

* This notebook evaluates standard classifiers from scikit-learn on the provided features.
* Moreover, it evaluates Deep Learning models on both audio and spectrograms.

In [1]:
!pip install python-dotenv
!pip install --upgrade librosa

import time
import os

import IPython.display as ipd
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
import keras
from keras.layers import Activation, Dense, Conv1D, Conv2D, MaxPooling1D, Flatten, Reshape

from sklearn.utils import shuffle
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder, LabelBinarizer, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
#from sklearn.gaussian_process import GaussianProcessClassifier
#from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.multiclass import OneVsRestClassifier


%load_ext autoreload
%autoreload 2

# Import the library to mount Google Drive



Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [2]:
from google.colab import drive
drive.mount('/content/drive')
FOLDERNAME = 'DL project/fma'
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))
%cd /content/drive/My\ Drive/$FOLDERNAME/

import utils

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1Gzbs2MQ8ttHuTOpxDaZoGkbdHOw1ukQO/DL project/fma


In [3]:
# AUDIO_DIR = os.environ.get('AUDIO_DIR')
# AUDIO_DIR = os.environ.get('AUDIO_DIR')
SPLIT_SIZE = 'small'
NUM_CLASSES = 0
if SPLIT_SIZE == 'small':
    NUM_CLASSES = 8
else:
    NUM_CLASSES = 16

AUDIO_DIR = 'data/raw_' + SPLIT_SIZE

tracks = utils.load('data/fma_metadata/tracks.csv')
features = utils.load('data/fma_metadata/features.csv')
echonest = utils.load('data/fma_metadata/echonest.csv')

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, features.shape, echonest.shape

((106574, 52), (106574, 518), (13129, 249))

## Subset

In [4]:
subset = tracks.index[tracks['set', 'subset'] <= 'small']

assert subset.isin(tracks.index).all()
assert subset.isin(features.index).all()

features_all = features.join(echonest, how='inner').sort_index(axis=1)
print('Not enough Echonest features: {}'.format(features_all.shape))

tracks = tracks.loc[subset]
features_all = features.loc[subset]

tracks.shape, features_all.shape

Not enough Echonest features: (13129, 767)


((8000, 52), (8000, 518))

In [5]:
train = tracks.index[tracks['set', 'split'] == 'training']
val = tracks.index[tracks['set', 'split'] == 'validation']
test = tracks.index[tracks['set', 'split'] == 'test']

print('{} training examples, {} validation examples, {} testing examples'.format(*map(len, [train, val, test])))

genres = list(LabelEncoder().fit(tracks['track', 'genre_top']).classes_)
#genres = list(tracks['track', 'genre_top'].unique())
print('Top genres ({}): {}'.format(len(genres), genres))
genres = list(MultiLabelBinarizer().fit(tracks['track', 'genres_all']).classes_)
print('All genres ({}): {}'.format(len(genres), genres))

6400 training examples, 800 validation examples, 800 testing examples
Top genres (8): ['Electronic', 'Experimental', 'Folk', 'Hip-Hop', 'Instrumental', 'International', 'Pop', 'Rock']
All genres (114): [1, 2, 6, 10, 12, 15, 16, 17, 18, 21, 22, 25, 26, 27, 30, 31, 32, 33, 36, 38, 41, 42, 45, 46, 47, 49, 53, 58, 64, 66, 70, 71, 76, 77, 79, 81, 83, 85, 86, 88, 89, 90, 92, 94, 98, 100, 101, 102, 103, 107, 109, 111, 113, 117, 118, 125, 130, 167, 171, 172, 174, 177, 180, 181, 182, 183, 184, 185, 186, 214, 224, 232, 236, 240, 247, 250, 267, 286, 296, 297, 314, 337, 359, 360, 361, 362, 400, 401, 404, 439, 440, 456, 468, 491, 495, 502, 504, 514, 524, 538, 539, 542, 580, 602, 619, 695, 741, 763, 808, 811, 1032, 1060, 1193, 1235]


In [6]:
tracks['track', 'genre_top'] = tracks['track', 'genre_top'].astype(str)
labels_onehot = LabelBinarizer().fit_transform(tracks['track', 'genre_top'])
labels_onehot = pd.DataFrame(labels_onehot, index=tracks.index)

## organized in genres


In [None]:
import os
import shutil
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
import numpy as np

def create_organized_dataset(tracks_df, source_dir, target_dir):
    """
    Reorganize FMA dataset based on top 8 genres and train/val/test splits.

    Args:
        tracks_df: DataFrame containing track metadata
        source_dir: Path to original FMA dataset (fma_small)
        target_dir: Path where reorganized dataset will be stored
    """
    # Create main directories for splits
    splits = ['train', 'val', 'test']
    for split in splits:
        os.makedirs(os.path.join(target_dir, split), exist_ok=True)

    # Get indices for each split
    split_indices = {
        'train': tracks_df.index[tracks_df['set', 'split'] == 'training'],
        'val': tracks_df.index[tracks_df['set', 'split'] == 'validation'],
        'test': tracks_df.index[tracks_df['set', 'split'] == 'test']
    }
    print(split_indices['val'].shape)
    # Create genre folders within each split
    genres = tracks_df['track', 'genre_top'].unique()
    for split in splits:
        for genre in genres:
            os.makedirs(os.path.join(target_dir, split, genre), exist_ok=True)

    # Function to get source file path
    def get_source_path(track_id):
        """Convert track ID to source file path."""
        tid_str = '{:06d}'.format(track_id)
        return os.path.join(source_dir, tid_str[:3], tid_str + '.mp3')

    # Copy files to new structure
    for split_name, indices in split_indices.items():
        print(f"Processing {split_name} split...")
        for idx in indices:
            try:
                # Get genre and create source/target paths
                genre = tracks_df.loc[idx, ('track', 'genre_top')]
                source_path = get_source_path(idx)
                target_path = os.path.join(target_dir, split_name, genre, f"{idx:06d}.mp3")

                # Copy file if it exists
                # print(source_path)
                if os.path.exists(source_path):
                  shutil.copy2(source_path, target_path)
                # else:
                    # print(f"Warning: Source file not found for track {idx}")

            except Exception as e:
                print(f"Error processing track {idx}: {str(e)}")

    # Create metadata file with one-hot encoded labels
    label_binarizer = LabelBinarizer()
    labels_onehot = label_binarizer.fit_transform(tracks_df['track', 'genre_top'])

    # Create DataFrame with one-hot labels
    labels_df = pd.DataFrame(
        labels_onehot,
        columns=label_binarizer.classes_,
        index=tracks_df.index
    )

    # Add split information
    labels_df['split'] = tracks_df['set', 'split']

    # Save metadata
    labels_df.to_csv(os.path.join(target_dir, 'metadata.csv'))

    # Print summary
    print("\nDataset reorganization complete!")
    print("\nStructure:")
    for split in splits:
        print(f"\n{split}:")
        for genre in genres:
            genre_path = os.path.join(target_dir, split, genre)
            n_files = len(os.listdir(genre_path))
            print(f"  {genre}: {n_files} files")

# Usage example:
import os
print("Current working directory:", os.getcwd())
source_directory = "data/fma_small"
target_directory = "data/organized_small"
create_organized_dataset(tracks, source_directory, target_directory)

Current working directory: /content/drive/.shortcut-targets-by-id/1Gzbs2MQ8ttHuTOpxDaZoGkbdHOw1ukQO/DL project/fma
(800,)
Processing train split...


In [None]:
splits = ['train', 'val', 'test']
target_dir = "data/organized_small"
for split in splits:
        print(f"\n{split}:")
        for genre in os.listdir(os.path.join(target_dir, split)):
          print(genre)
          genre_path = os.path.join(target_dir, split, genre)
          n_files = len(os.listdir(genre_path))
          print(f"  {genre}: {n_files} files")

## 3 Deep learning on extracted audio features

Look at:
* Pre-processing in Keras: https://github.com/keunwoochoi/kapre
* Convolutional Recurrent Neural Networks for Music Classification: https://github.com/keunwoochoi/icassp_2017
* Music Auto-Tagger: https://github.com/keunwoochoi/music-auto_tagging-keras
* Pre-processor: https://github.com/bmcfee/pumpp

### 3.1 ConvNet on MFCC

* Architecture: [Automatic Musical Pattern Feature Extraction Using Convolutional Neural Network](http://www.iaeng.org/publication/IMECS2010/IMECS2010_pp546-550.pdf), Tom LH. Li, Antoni B. Chan and Andy HW. Chun
* Missing: track segmentation and majority voting.
* Best seen: 17.6%

## storing mfcc in npy

In [9]:
import os
import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [None]:
import os
import librosa
import numpy as np

def preprocess_and_save_npy(input_root, output_root, metadata_path):
    shape = (13, 2582)
    metadata = pd.read_csv(metadata_path)
    for split in ['train', 'val', 'test']:
        input_dir = os.path.join(input_root, split)
        output_dir = os.path.join(output_root, split)

        for genre_folder in os.listdir(input_dir):
            genre_input_dir = os.path.join(input_dir, genre_folder)
            genre_output_dir = os.path.join(output_dir, genre_folder)

            if not os.path.isdir(genre_input_dir):
                continue

            os.makedirs(genre_output_dir, exist_ok=True)
            for file in os.listdir(genre_input_dir):
                if not file.endswith('.mp3'):
                    continue
                input_file = os.path.join(genre_input_dir, file)
                output_file = os.path.join(genre_output_dir, f"{os.path.splitext(file)[0]}.npy")
                if not os.path.exists(output_file):  # Skip if already processed
                  try:
                    y, sr = librosa.load(input_file, sr=22050)
                    mfcc = librosa.feature.mfcc(y=y, sr=22050, n_mfcc=13, n_fft=512, hop_length=256)
                    padded_mfcc = np.zeros(shape, dtype=np.float32)
                    padded_mfcc[:, :min(mfcc.shape[1], shape[1])] = mfcc[:, :shape[1]]
                    padded_mfcc = padded_mfcc.T
                    np.save(output_file, padded_mfcc)
                    # print(f"Saved: {output_file}")
                  except Exception as e:
                    print(f"Error processing {input_file}: {e}")
            print(f"saved {len(genre_output_dir)}")

# Example usage
input_dir = "data/organized_small"
output_dir = "data/mfcc_small"
metadata_path = "data/organized_small/metadata.csv"
preprocess_and_save_npy(input_dir, output_dir, metadata_path)


  y, sr = librosa.load(input_file, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing data/organized_small/train/Hip-Hop/068592.mp3: 
saved 29 files
saved 25 files
saved 26 files
saved 34 files
saved 26 files
saved 35 files
saved 32 files
saved 34 files
saved 27 files
saved 23 files
saved 24 files
saved 32 files
saved 24 files
saved 33 files
saved 30 files
saved 32 files
saved 28 files
saved 24 files
saved 25 files
saved 33 files
saved 25 files
saved 34 files
saved 31 files
saved 33 files


In [None]:
splits = ['train', 'val', 'test']
target_dir = "data/mfcc_small"
for split in splits:
    print(f"\n{split}:")
    for genre in os.listdir(os.path.join(target_dir, split)):
      # print(genre)
      genre_path = os.path.join(target_dir, split, genre)
      n_files = len(os.listdir(genre_path))
      print(f"  {genre}: {n_files} files")

## preprocessing

In [None]:
# all_mfccs = []
# for file in "data/mfcc_small":
#     mfcc = np.load(file).astype(np.float32)
#     all_mfccs.append(mfcc)
# all_mfccs = np.array(all_mfccs)
# mean = np.mean(all_mfccs, axis=(0, 1))  # Mean along time and feature axis
# std = np.std(all_mfccs, axis=(0, 1))

In [7]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

class AudioDataset(Dataset):
    def __init__(self, root_dir, split):
        """
        Args:
            root_dir (str): Root directory for preprocessed data.
            split (str): One of 'train', 'val', or 'test'.
        """
        self.root_dir = os.path.join(root_dir, split)
        self.file_paths = []
        self.labels = []

        # Collect all file paths and their corresponding labels
        for label_idx, genre_folder in enumerate(sorted(os.listdir(self.root_dir))):
            # print(genre_folder)
            genre_dir = os.path.join(self.root_dir, genre_folder)
            if not os.path.isdir(genre_dir):
                continue
            files = [os.path.join(genre_dir, f) for f in os.listdir(genre_dir) if f.endswith('.npy')]
            self.file_paths.extend(files)
            self.labels.extend([label_idx] * len(files))


        # self.mean = torch.tensor([-280.04968, 114.869, -2.329334, 26.485447, 2.461681,
        #                           7.954758, -1.3554215, 3.4597769, -2.9572344, 2.5802717,
        #                           -2.7177243, 1.520156, -2.8376024]
        #                         )
        # self.std = torch.tensor([[117.78101, 49.52332, 36.08587, 23.085941, 19.979362, 17.462248,
        #                           15.67521, 14.535121, 13.653436, 13.152688, 12.6906, 12.248392,
        #                           11.782216]
        #                         ])
        # all_mfccs = []
        # for file in self.file_paths:
        #     mfcc = np.load(file).astype(np.float32)
        #     all_mfccs.append(mfcc)
        # all_mfccs = np.array(all_mfccs)
        # self.mean = np.mean(all_mfccs, axis=(0, 1))  # Mean along time and feature axis
        # self.std = np.std(all_mfccs, axis=(0, 1))
        # print("aoiejfaofjeoiwj", self.mean, self.std)

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]

        # Load the .npy file
        mfcc = np.load(file_path).astype(np.float32)
        mfcc = (mfcc - np.mean(mfcc)) / np.std(mfcc)

        mfcc = torch.tensor(mfcc)

        # mfcc = (mfcc- self.mean) / self.std

        label = torch.tensor(label, dtype=torch.long)
        label = torch.nn.functional.one_hot(label, num_classes=8).float()

        return mfcc, label

In [None]:
# class AudioDataset(Dataset):
#     def __init__(self, data_dir, split, labels_onehot, transform=None):
#         """
#         Args:
#             data_dir: Root directory of organized dataset
#             split: 'train', 'val', or 'test'
#             metadata_path: Path to metadata.csv with labels
#             transform: Optional transform to be applied on features
#         """
#         if split == "training":
#           self.data_dir = os.path.join(data_dir, 'train')
#         elif split == "validation":
#           self.data_dir = os.path.join(data_dir, 'val')
#         elif split == 'test':
#           self.data_dir = os.path.join(data_dir, split)
#         self.transform = transform

#         # Load metadata
#         metadata = pd.read_csv(metadata_path)
#         # self.metadata = metadata[metadata['track_id'].isin(indices)]
#         self.metadata = metadata[metadata['split'] == split].copy()
#         # print(split)
#         print(self.metadata.shape)
#         self.metadata = self.metadata.drop('split', axis=1)

#         # Separate track IDs and genre labels
#         self.track_ids = self.metadata['track_id'].values
#         self.labels = self.metadata.drop('track_id', axis=1).values  # Columns 1-8 are genre one-hot encodings
#         # print(indices.shape)
#         print(self.track_ids.shape)
#         # Get file paths
#         self.file_paths = []
#         for track_id in self.track_ids:
#           row = self.metadata[self.metadata['track_id'] == track_id]
#           genre_columns = list(self.metadata.columns[1:])  # Get genre column names
#           genre = genre_columns[row.drop('track_id', axis=1).values[0].argmax()]

#           file_path = os.path.join(self.data_dir, genre, f"{track_id:06d}.mp3")
#           if os.path.exists(file_path):
#               self.file_paths.append(file_path)
#         print(len(self.file_paths))
#     def __len__(self):
#         return len(self.file_paths)

#     def __getitem__(self, idx):
#         file_path = self.file_paths[idx]
#         label = torch.FloatTensor(self.labels[idx])

#         # Load and process audio file
#         features = extract_mfcc_features(file_path)

#         if self.transform:
#             features = self.transform(features)

#         return features, label


In [None]:
# def extract_mfcc_features(file_path):
#   """Extract MFCC features from audio file."""
#   # Load audio file
#   shape = (13, 2582)
#   y, sr = librosa.load(file_path, sr=SAMPLE_RATE)

#   # Extract MFCC features
#   mfcc = librosa.feature.mfcc(y=y, sr=22050, n_mfcc=13, n_fft=512, hop_length=256)
#   padded_mfcc = np.zeros(shape, dtype=np.float32)
#   padded_mfcc[:, :min(mfcc.shape[1], shape[1])] = mfcc[:, :shape[1]]
#   # print(padded_mfcc.shape)
#   padded_mfcc = padded_mfcc.T

#   return padded_mfcc

## model + training

In [89]:
import torch.nn.init as init


class RNNClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.5):
        super(RNNClassifier, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=True
        )

        self.fc = nn.Sequential(
            nn.Linear(hidden_size * 2, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_classes),
            # nn.Sigmoid()
        )
        self._initialize_weights()

    def _initialize_weights(self):
        # Initialize LSTM weights
        for name, param in self.lstm.named_parameters():
            if 'weight' in name:
                init.xavier_uniform_(param)
            elif 'bias' in name:
                init.zeros_(param)

        # Initialize FC weights
        for name, param in self.fc.named_parameters():
            if 'weight' in name:
                init.xavier_uniform_(param)
            elif 'bias' in name:
                init.zeros_(param)


    def forward(self, x):
        # x shape: (batch_size, sequence_length, input_size)
        lstm_out, _ = self.lstm(x)

        # Use last time step output
        lstm_out = lstm_out[:, -1, :]

        # Pass through fully connected layers
        output = self.fc(lstm_out)
        return output

In [72]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    """Train the RNN model."""
    best_val_loss = float('inf')

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0
        for features, labels in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            # print(outputs.dtype, labels.dtype)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0
        matched = [0]*8
        correct = 0
        total = 0
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), labels.to(device)
                outputs = model(features)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                # print accuracy
                predicted = torch.argmax(outputs, dim=1)
                labels_one = torch.argmax(labels, dim=1)

                for i in range(len(predicted)):
                  if predicted[i] == labels_one[i]:
                    matched[predicted[i]] += 1
                correct += (predicted == labels_one).sum().item()
                total += labels_one.numel()


        print(correct/total, [c/100 for c in matched])

        # Print epoch statistics
        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Training Loss: {train_loss:.4f}', f'Validation Loss: {val_loss:.4f}')
        # print()

        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')

In [108]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int,int)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.long()
        target = target.argmax(dim=1)



        logpt = F.log_softmax(input, dim=-1)
        logpt = logpt.gather(1,target.unsqueeze(1))
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()

In [109]:
# Constants
SAMPLE_RATE = 2000
N_MFCC = 13
HOP_LENGTH = 512
N_FFT = 2048
DURATION = 30  # seconds
# N_SEGMENTS = 10  # number of segments per song

data_dir = "data/organized_small"
metadata_path = os.path.join(data_dir, "metadata.csv")
batch_size = 32
hidden_size = 16
num_layers = 3
num_classes = 8  # number of genres
learning_rate = 0.0001
num_epochs = 50

# Create datasets and dataloaders
npy_root = "data/mfcc_small"
train_dataset = AudioDataset(npy_root, "train")
val_dataset = AudioDataset(npy_root, "val")
test_dataset = AudioDataset(npy_root, "test")
print(len(train_dataset))

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# for batch in train_loader:
#     print(batch[0].shape)
#     break

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = N_MFCC  # MFCC + delta + delta-delta
model = RNNClassifier(input_size, hidden_size, num_layers, num_classes).to(device)

# Set up training
# criterion = nn.CrossEntropyLoss()
criterion = FocalLoss(gamma=2, alpha=0.25, size_average=True)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
print(device)
# Train model
# train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)


2338
cuda


In [110]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

Epoch 1/50:   0%|          | 0/74 [00:00<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# Load the best model
model.load_state_dict(torch.load('best_model.pth'))
model.to(device)
model.eval()

def compute_accuracy(loader, model):
    correct = 0
    total = 0
    matched = [0]*8
    with torch.no_grad():
        for features, labels in loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            # print(labels)
            # predicted = (outputs > 0.5).float()  # Adjust for your task
            predicted = torch.argmax(outputs, dim=1)
            labels_one = torch.argmax(labels, dim=1)

            for i in range(len(predicted)):
              if predicted[i] == labels_one[i]:
                matched[predicted[i]] += 1
            correct += (predicted == labels_one).sum().item()
            total += labels_one.numel()
    print("correct classified: ", correct)
    print("total :", total)

    print([c/100 for c in matched])
    return correct / total

  model.load_state_dict(torch.load('best_model.pth'))


In [None]:
train_accuracy = compute_accuracy(train_loader, model)
val_accuracy = compute_accuracy(val_loader, model)

print(f"Final Training Accuracy: {train_accuracy:.4f}")
print(f"Final Validation Accuracy: {val_accuracy:.4f}")

KeyboardInterrupt: 

In [None]:
test_accuracy = compute_accuracy(test_loader, model)
print(f"test accuracy; {test_accuracy:.4f}")

correct classified:  195
total : 800
[0.03, 0.02, 0.36, 0.5, 0.0, 0.57, 0.0, 0.47]
test accuracy; 0.2437


In [None]:
for raw_pred in raw_preds:
    pred_labels = np.argmax(raw_pred, axis=1)
    true_labels = np.argmax(test_labels, axis=1)

    print(np.sum(pred_labels == true_labels)/800)
    correct = [0]*8
    for i in range(800):
        if pred_labels[i] == true_labels[i]:
            correct[true_labels[i]] +=1


    print([c/100 for c in correct])