<a href="https://colab.research.google.com/github/AndreisMarco/02456_G128_bird_classification/blob/main/scripts/06_Audio_classification_with_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Set up environment

In [1]:
!pip install datasets evaluate --quiet

## 1.1 Import libraries

In [2]:
# setting up Drive and path for data loading and saving
import os
from google.colab import drive

# for data processing
import numpy as np
import re
from datasets import Dataset, concatenate_datasets
from transformers import AutoFeatureExtractor

# for model training and evaluation
import torch
from torch import nn
import evaluate
from torch.utils.data import DataLoader
from datetime import datetime

# for visualisation
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
# enforcing reproducibility - from NLP - lab 2 notebook
import random
import numpy as np

def enforce_reproducibility(seed=42):
    # Sets seed manually for both CPU and CUDA
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # For atomic operations there is currently
    # no simple way to enforce determinism, as
    # the order of parallel operations is not known.
    # CUDNN
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # System based
    random.seed(seed)
    np.random.seed(seed)

In [5]:
enforce_reproducibility()

## 1.2 Prepare data

In [6]:
# mount Drive and set path
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Deep Learning - DTU 2024/project'
os.chdir(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# load (one batch of) preprocessed data
# batch_path = 'batch_1'
# dataset = Dataset.load_from_disk(batch_path).remove_columns('__index_level_0__')

In [8]:
# from Marco's code

def sort_numerically(batch_paths):
    '''
    Necessary for standardizing the batches import order
    '''
    def extract_number(batch_dir):
        match = re.search(r'(\d+)', batch_dir)
        return int(match.group(1)) if match else 0
    sorted_paths = sorted(batch_paths, key=extract_number)
    return sorted_paths

def load_and_merge_batches(batch_folder):
    '''
    Loads all .arrow files and merges them in a single dataset
    '''
    print(f"Loading and merging batches from folder: {batch_folder}")
    batch_paths = [f for f in os.listdir(batch_folder) if os.path.isdir(os.path.join(batch_folder, f))]
    batch_paths = sort_numerically(batch_paths)
    datasets_list = []

    for batch_dir in batch_paths:
        batch_path = os.path.join(batch_folder, batch_dir)
        dataset = Dataset.load_from_disk(batch_path)
        datasets_list.append(dataset)

    merged_dataset = concatenate_datasets(datasets_list)
    print(f"Merged {len(datasets_list)} batches into a single dataset.")
    return merged_dataset

In [9]:
dataset = load_and_merge_batches("processed_data")

Loading and merging batches from folder: processed_data
Merged 15 batches into a single dataset.


In [10]:
# inspect structure
dataset = dataset.remove_columns('__index_level_0__')
dataset

Dataset({
    features: ['audio', 'label'],
    num_rows: 24496
})

In [11]:
dataset[0].keys()

dict_keys(['audio', 'label'])

In [12]:
print(set(dataset['label']))
num_classes = len(set(dataset['label']))
print(f"Number of classes in the dataset: {num_classes}")

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49}
Number of classes in the dataset: 50


In [13]:
# re-assign classes - takes 3 min
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(dataset["label"])  # Fit on all labels in training set

def remap_labels(batch, label_column='label'):
    batch[label_column] = label_encoder.transform(batch[label_column])
    return batch

dataset = dataset.map(remap_labels, batched=True)
all_classes = np.unique(dataset['label'])
print(f"Renamed classes: {all_classes}")

Renamed classes: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]


### 1.2.1 Feature Extraction

In [14]:
model_dir = 'facebook/wav2vec2-base-960h'
feature_extractor = AutoFeatureExtractor.from_pretrained(model_dir)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [15]:
# extract features - from Marco's code - takes 15 min
# def preprocess_function(example):
#     inputs = feature_extractor(example['audio'], sampling_rate=16000, padding=True)
#     return inputs
# dataset = dataset.map(preprocess_function, remove_columns="audio", batched=True, batch_size=32)
# print("Preprocessed dataset with feature extractor.")

In [16]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Compute class weights
all_classes = np.unique(dataset['label'])  # Assuming 'label' is your target column

### 1.2.2 Split Dataset

In [17]:
# split dataset into train, val and test - from Marco's code
dataset = dataset.train_test_split(test_size=0.1, shuffle=True, stratify_by_column="label", seed=42)
print("Split dataset into training and testing.")

Split dataset into training and testing.


### 1.2.3 Compute class weights

In [18]:
class_weights = compute_class_weight('balanced', classes=all_classes, y=dataset['train']['label'])

# Handle missing classes
all_class_weights = {}
for class_id in all_classes:
  if class_id in class_weights:
      all_class_weights[class_id] = class_weights[class_id]
  else:
      all_class_weights[class_id] = 1.0  # or any default weight you prefer
# Convert to Tensor
class_weight_tensor = torch.tensor(list(all_class_weights.values()), dtype=torch.float32).to(device)

In [19]:
all_classes

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [20]:
print(f"Computed class weights: {class_weights}")
print(f"Class weights tensor moved to device: {device}")

Computed class weights: [1.48959459 2.0412963  2.84464516 1.3122619  0.52995192 1.29302053
 1.60919708 2.19363184 0.71461912 0.41130597 2.18277228 0.36289712
 0.62014065 1.33208459 0.65418398 1.32011976 1.02539535 1.3608642
 1.48457912 1.91704348 0.7511414  0.85284333 0.86454902 0.89074747
 1.59753623 1.8144856  0.44313568 2.19363184 0.60152797 1.67015152
 1.26701149 0.41400939 1.76368    1.09955112 1.43155844 1.4177492
 1.79967347 1.74968254 0.85284333 1.55253521 1.38219436 1.71564202
 0.85119691 0.54977556 0.58245707 2.51954286 0.83984762 2.15082927
 3.47181102 0.93613588]
Class weights tensor moved to device: cuda


In [21]:
# from sklearn.utils.class_weight import compute_class_weight

# # Compute class weights and store in a dict
# class_weights = compute_class_weight('balanced', classes=np.unique(dataset['train']['label']), y=dataset['train']['label'])
# class_weights = {class_id: weight for class_id, weight in zip(np.unique(dataset['train']['label']), class_weights)}
# print(f"Computed class weights: {class_weights}")
# # Convert weights to Tensor
# class_weight_tensor = torch.tensor(list(class_weights.values()), dtype=torch.float32).to(device)
# print(f"Class weights tensor moved to device: {device}")

### 1.2.4 Dataset to Loader

In [22]:
# from torch.nn.utils.rnn import pad_sequence

MAX_LENGTH = 160000  # Use the longest sequence length in your dataset

def collate_fn(batch):
    inputs = []
    for item in batch:
        input_values = item['audio'].clone().detach()

        # running the line below didn't work properly, I had to manually pad
        # padded_inputs = pad_sequence(inputs, batch_first=True).unsqueeze(1)

        if len(input_values) < MAX_LENGTH: # pad
            padded = torch.cat((input_values, torch.zeros(MAX_LENGTH - len(input_values))))
        else: # truncate
            padded = input_values[:MAX_LENGTH]
        inputs.append(padded)
    labels = torch.tensor([item['label'] for item in batch])
    return torch.stack(inputs).unsqueeze(1), labels

In [23]:
# convert dataset col to tensors
dataset['train'].set_format(type='torch', columns=['audio', 'label'])
dataset['test'].set_format(type='torch', columns=['audio', 'label'])

# 2. Model Training

## 2.1 Initiate CNN

In [24]:
class AudioCNN(nn.Module):
    def __init__(self, num_classes):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool1d(2, 2)
        self.fc1 = nn.Linear(64 * 40000, 256)  # input size should be 160000, which is confirmed wile the model was trained
        self.bn_fc1 = nn.BatchNorm1d(256) # batchnorm didn't really work
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(256, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        # debug step:
        # print("Shape after conv1:", x.shape)

        x = self.pool(self.relu(self.conv2(x)))
        # print("Shape after conv2:", x.shape)

        x = x.view(x.size(0), -1)  # Flatten the tensor
        #print("Shape after flattening:", x.shape)

        # x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.bn_fc1(self.fc1(x))))
        # print("Shape after fc1:", x.shape)

        x = self.fc2(x)  # Second fully connected layer (output)
        # print("Shape after fc2:", x.shape)
        return x

In [25]:
cnn = AudioCNN(num_classes).to(device)
print(cnn)

AudioCNN(
  (conv1): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2560000, out_features=256, bias=True)
  (bn_fc1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=256, out_features=50, bias=True)
  (relu): ReLU()
)


## 2.2 Load Loss, Optimizer and Performance Metric

In [26]:
loss_fct = nn.CrossEntropyLoss(weight=class_weight_tensor)
optimizer = torch.optim.Adam(cnn.parameters(), lr=1e-5, weight_decay=1e-2)

In [27]:
# load performace metric: accuracy
accuracy = evaluate.load("accuracy")

## 2.3 Model Training

In [28]:
# directory for model saving
current_time = datetime.now().strftime("%m-%d_%H-%M-%S")
save_dir = f"checkpoints_{current_time}"
os.makedirs(save_dir, exist_ok=True)

In [None]:
num_epochs = 5
batch_size = 16

# load data
train_loader = torch.utils.data.DataLoader(dataset['train'], batch_size=batch_size, collate_fn=collate_fn)
test_loader = torch.utils.data.DataLoader(dataset['test'], batch_size=batch_size, collate_fn=collate_fn)

# init metric containers
train_iter, train_losses, train_accs = [], [], []
test_iter, test_losses, test_accs = [], [], []

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# train and evaluate
for epoch in range(num_epochs):
  cnn.train()
  step = 0
  for inputs, labels in train_loader:
    step += 1

    inputs, labels = inputs.to(device), labels.to(device)
    # print(f"Shape of inputs before passing to model: {inputs.shape}")
    # print(f"Unique values in labels: {torch.unique(labels)}")

    optimizer.zero_grad()
    outputs = cnn(inputs)

    loss = loss_fct(outputs, labels)
    loss.backward()
    optimizer.step()

    if step % 689 == 0:
      train_iter.append(step + epoch*len(train_loader))
      train_losses.append(loss.item())
      train_acc = accuracy.compute(predictions=outputs.argmax(axis=1), references=labels)['accuracy']
      train_accs.append(train_acc)
      print(f"Epoch [{epoch+1}/{num_epochs}], Step [{step}/{len(train_loader)}], Train loss: {loss.item():.4f}, Train accuracy: {train_acc:.4f}")

      # calculate, append and display evaluation reports
      cnn.eval()
      test_loss, test_acc = 0, 0
      with torch.no_grad():
        for inputs, labels in test_loader:
          inputs, labels = inputs.to(device), labels.to(device)
          labels = torch.clamp(labels, 0, num_classes - 1).long()
          outputs = cnn(inputs)
          test_loss += loss_fct(outputs, labels).item()
          test_acc += accuracy.compute(predictions=outputs.argmax(axis=1), references=labels)['accuracy']

        # append to reports
        test_iter.append(step + epoch*len(train_loader))
        test_losses.append(test_loss / len(test_loader))
        test_accs.append(test_acc / len(test_loader))

        # display reports
        print(f"Test Loss: {test_loss / len(test_loader):.4f}, Test Accuracy: {test_acc / len(test_loader):.4f}")

  # schedule lr
  scheduler.step()

  # Save the model checkpoint at the end of each epoch
  checkpoint_path = os.path.join(save_dir, f"model_epoch_{epoch+1}.pth")
  torch.save({
    'epoch': epoch + 1,
    'model_state_dict': cnn.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss.item(),
  }, checkpoint_path)
  print(f"Model saved to {checkpoint_path}")

print('')
print(f'Final training loss: {str(train_losses[-1])} accuracy: {str(train_accs[-1])}')
print(f'Final validation loss: {str(test_losses[-1])} accuracy: {str(test_accs[-1])}')

# Save the final model - with datetime_id


final_model_path =  os.path.join(save_dir, f"/final_model.pth")
torch.save(cnn.state_dict(), final_model_path)
print(f"Final model saved to {final_model_path}")

Epoch [1/5], Step [689/1378], Train loss: 3.2199, Train accuracy: 0.2500
Test Loss: 7.7746, Test Accuracy: 0.0195
Epoch [1/5], Step [1378/1378], Train loss: 3.9224, Train accuracy: 0.0714
Test Loss: 3.9106, Test Accuracy: 0.0179
Model saved to checkpoints_11-26_15-49-06/model_epoch_1.pth
Epoch [2/5], Step [689/1378], Train loss: 3.0229, Train accuracy: 0.4375
Test Loss: 4.0102, Test Accuracy: 0.1059
Epoch [2/5], Step [1378/1378], Train loss: 3.9216, Train accuracy: 0.0714
Test Loss: 3.9090, Test Accuracy: 0.0179
Model saved to checkpoints_11-26_15-49-06/model_epoch_2.pth
Epoch [3/5], Step [689/1378], Train loss: 2.8098, Train accuracy: 0.4375
Test Loss: 3.9594, Test Accuracy: 0.0812
Epoch [3/5], Step [1378/1378], Train loss: 3.9208, Train accuracy: 0.0000
Test Loss: 3.9074, Test Accuracy: 0.0511
Model saved to checkpoints_11-26_15-49-06/model_epoch_3.pth
Epoch [4/5], Step [689/1378], Train loss: 2.6070, Train accuracy: 0.4375
Test Loss: 4.7324, Test Accuracy: 0.0674


In [None]:
# code for resuming
# checkpoint = torch.load("checkpoints/model_epoch_X.pth")  # Replace X with the desired epoch
# cnn.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# start_epoch = checkpoint['epoch']
# loss = checkpoint['loss']

## 2.4 Plot Performance

In [None]:
train_iter

In [None]:
# plots of final loss and accuracy of training and validation data
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(train_iter, train_losses, label='Train Loss')
plt.plot(test_iter, test_losses, label='Validation Loss')
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_iter, train_accs, label='Train Accuracy')
plt.plot(test_iter, test_accs, label='Validation Accuracy')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend()

plt.suptitle('Training and Validation Loss and Accuracy')

plt.show()

**Notes:**

_with_ feature extractor:
- **lr=3e-5, weight_decay=0.01:** Final training loss: 3.722187042236328 accuracy: 0.125
Final validation loss: 3.581272809536426 accuracy: 0.13271103896103897

_without_ feature extractor:
- **lr=3e-5, weight_decay=0.01:** Final training loss: 3.9091145992279053 accuracy: 0.0625
Final validation loss: 3.9065329734381145 accuracy: 0.05113636363636364
- **lr=3e-4, weight_decay=1e-4:** Final training loss: 3.8578035831451416 accuracy: 0.0625
Final validation loss: 3.8629037931367947 accuracy: 0.05113636363636364
- **10 epochs:** Epoch [1/10], Step [1000/1378], Train loss: 3.8586, Train accuracy: 0.0000
Test Loss: 3.8736, Test Accuracy: 0.0511
Model saved to checkpoints_11-25_14-39-54/model_epoch_1.pth
Epoch [2/10], Step [1000/1378], Train loss: 3.7815, Train accuracy: 0.0000
Test Loss: 3.8309, Test Accuracy: 0.0511
Model saved to checkpoints_11-25_14-39-54/model_epoch_2.pth
Epoch [3/10], Step [1000/1378], Train loss: 3.7240, Train accuracy: 0.0000
Test Loss: 3.8027, Test Accuracy: 0.0511
Model saved to checkpoints_11-25_14-39-54/model_epoch_3.pth
Epoch [4/10], Step [1000/1378], Train loss: 3.6817, Train accuracy: 0.0000
Test Loss: 3.7847, Test Accuracy: 0.0511
Model saved to checkpoints_11-25_14-39-54/model_epoch_4.pth
Epoch [5/10], Step [1000/1378], Train loss: 3.6327, Train accuracy: 0.0625
Test Loss: 3.7637, Test Accuracy: 0.0548
Model saved to checkpoints_11-25_14-39-54/model_epoch_5.pth
Epoch [6/10], Step [1000/1378], Train loss: 3.1374, Train accuracy: 0.1875
Test Loss: 3.4809, Test Accuracy: 0.1343
Model saved to checkpoints_11-25_14-39-54/model_epoch_6.pth
Epoch [7/10], Step [1000/1378], Train loss: 2.5359, Train accuracy: 0.3125
Test Loss: 3.1597, Test Accuracy: 0.1928
Model saved to checkpoints_11-25_14-39-54/model_epoch_7.pth
Epoch [8/10], Step [1000/1378], Train loss: 1.9593, Train accuracy: 0.3750
Test Loss: 2.8891, Test Accuracy: 0.2646
Model saved to checkpoints_11-25_14-39-54/model_epoch_8.pth
Epoch [9/10], Step [1000/1378], Train loss: 1.4299, Train accuracy: 0.5625
Test Loss: 2.5677, Test Accuracy: 0.3758
Model saved to checkpoints_11-25_14-39-54/model_epoch_9.pth
Epoch [10/10], Step [1000/1378], Train loss: 0.8334, Train accuracy: 0.8125
Test Loss: 2.3671, Test Accuracy: 0.4614
Model saved to checkpoints_11-25_14-39-54/model_epoch_10.pth

Final training loss: 0.8333953619003296 accuracy: 0.8125
Final validation loss: 2.3671497780007202 accuracy: 0.4614448051948052
Final model saved to /final_model.pth

- try batch normalisation: Final training loss: 1.1365776062011719 accuracy: 0.875
Final validation loss: 27.289619705893777 accuracy: 0.037337662337662336
- **added dropout:** Epoch [1/1], Step [500/1378], Train loss: 3.0752, Train accuracy: 0.3125
Test Loss: 12.0622, Test Accuracy: 0.0605
Epoch [1/1], Step [1000/1378], Train loss: 3.8823, Train accuracy: 0.1875
Test Loss: 3.8838, Test Accuracy: 0.0463
Model saved to checkpoints_11-26_07-26-57/model_epoch_1.pth

Final training loss: 3.8823254108428955 accuracy: 0.1875
Final validation loss: 3.883833321658048 accuracy: 0.04626623376623377
- **lr=3e-4, weight_decay=1e-2:** Epoch [1/1], Step [500/1378], Train loss: 2.9467, Train accuracy: 0.3750
Test Loss: 7.2108, Test Accuracy: 0.0743
Epoch [1/1], Step [1000/1378], Train loss: 3.6674, Train accuracy: 0.0625
Test Loss: 3.7200, Test Accuracy: 0.1002
Model saved to checkpoints_11-26_07-40-09/model_epoch_1.pth

Final training loss: 3.6674227714538574 accuracy: 0.0625
Final validation loss: 3.7200061129285142 accuracy: 0.1002435064935065
Final model saved to /final_model.pth
- **lr=3e-4, weight_decay=1e-1:** Epoch [1/1], Step [689/1378], Train loss: 3.2267, Train accuracy: 0.1250
Test Loss: 39.7332, Test Accuracy: 0.0373
Epoch [1/1], Step [1378/1378], Train loss: 3.8267, Train accuracy: 0.0714
Test Loss: 3.8422, Test Accuracy: 0.0548
Model saved to checkpoints_11-26_07-51-10/model_epoch_1.pth

Final training loss: 3.826742172241211 accuracy: 0.07142857142857142
Final validation loss: 3.84219745382086 accuracy: 0.05478896103896104
Final model saved to /final_model.pth
- let's remove batchnorm and keep dropout and readjust weight decay: Epoch [1/1], Step [689/1378], Train loss: 3.8826, Train accuracy: 0.0000
Test Loss: 3.8835, Test Accuracy: 0.0511
- batchnorm seems to change the outcomes a lot but results in overfitting
- **no batchnorm, dropout, lr=3e-4, weight_decay=1e-3:** Epoch [1/20], Step [689/1378], Train loss: 3.8826, Train accuracy: 0.0000
Test Loss: 3.8835, Test Accuracy: 0.0511
Epoch [1/20], Step [1378/1378], Train loss: 3.8967, Train accuracy: 0.0000
Test Loss: 3.8584, Test Accuracy: 0.0511
Model saved to checkpoints_11-26_09-39-21/model_epoch_1.pth
Epoch [2/20], Step [689/1378], Train loss: 3.8157, Train accuracy: 0.0000
Test Loss: 3.8381, Test Accuracy: 0.0511
Epoch [2/20], Step [1378/1378], Train loss: 3.8821, Train accuracy: 0.0000
Test Loss: 3.8210, Test Accuracy: 0.0511
Model saved to checkpoints_11-26_09-39-21/model_epoch_2.pth
Epoch [3/20], Step [689/1378], Train loss: 3.7665, Train accuracy: 0.0000
Test Loss: 3.8079, Test Accuracy: 0.0511
Epoch [3/20], Step [1378/1378], Train loss: 3.8764, Train accuracy: 0.0000
Test Loss: 3.7971, Test Accuracy: 0.0511
Model saved to checkpoints_11-26_09-39-21/model_epoch_3.pth
Epoch [4/20], Step [689/1378], Train loss: 3.7312, Train accuracy: 0.2500
Test Loss: 3.7888, Test Accuracy: 0.0548
Epoch [4/20], Step [1378/1378], Train loss: 3.8628, Train accuracy: 0.0714
Test Loss: 3.7621, Test Accuracy: 0.0548
Model saved to checkpoints_11-26_09-39-21/model_epoch_4.pth
Epoch [5/20], Step [689/1378], Train loss: 3.6156, Train accuracy: 0.2500
Test Loss: 3.7550, Test Accuracy: 0.0548
Epoch [5/20], Step [1378/1378], Train loss: 3.5863, Train accuracy: 0.0714
Test Loss: 3.6156, Test Accuracy: 0.0946
Model saved to checkpoints_11-26_09-39-21/model_epoch_5.pth
Epoch [6/20], Step [689/1378], Train loss: 3.4459, Train accuracy: 0.2500
Test Loss: 3.5688, Test Accuracy: 0.0974
Epoch [6/20], Step [1378/1378], Train loss: 3.5571, Train accuracy: 0.1429
Test Loss: 3.4508, Test Accuracy: 0.1197
Model saved to checkpoints_11-26_09-39-21/model_epoch_6.pth
Epoch [7/20], Step [689/1378], Train loss: 2.9947, Train accuracy: 0.2500
Test Loss: 3.4292, Test Accuracy: 0.1250
Epoch [7/20], Step [1378/1378], Train loss: 3.4386, Train accuracy: 0.0714
Test Loss: 3.2594, Test Accuracy: 0.1729
Model saved to checkpoints_11-26_09-39-21/model_epoch_7.pth
Epoch [8/20], Step [689/1378], Train loss: 3.0706, Train accuracy: 0.3125
Test Loss: 3.2220, Test Accuracy: 0.1907
Epoch [8/20], Step [1378/1378], Train loss: 3.3021, Train accuracy: 0.2143
Test Loss: 3.0698, Test Accuracy: 0.2009
Model saved to checkpoints_11-26_09-39-21/model_epoch_8.pth
Epoch [9/20], Step [689/1378], Train loss: 2.8142, Train accuracy: 0.3125
Test Loss: 3.1078, Test Accuracy: 0.1887
Epoch [9/20], Step [1378/1378], Train loss: 3.1340, Train accuracy: 0.2143
Test Loss: 2.8002, Test Accuracy: 0.2731
Model saved to checkpoints_11-26_09-39-21/model_epoch_9.pth
Epoch [10/20], Step [689/1378], Train loss: 3.1712, Train accuracy: 0.1250
Test Loss: 2.8940, Test Accuracy: 0.2419
Epoch [10/20], Step [1378/1378], Train loss: 2.6017, Train accuracy: 0.3571
Test Loss: 2.5704, Test Accuracy: 0.3588
- still training slowly
- ok but I would argue batchnorm makes sense, but the model is not learning properly
- Epoch [1/5], Step [689/1378], Train loss: 2.6843, Train accuracy: 0.3750
Test Loss: 6.7486, Test Accuracy: 0.0690
Epoch [1/5], Step [1378/1378], Train loss: 3.6963, Train accuracy: 0.0714
Test Loss: 3.4846, Test Accuracy: 0.1396
Model saved to checkpoints_11-26_15-12-49/model_epoch_1.pth
Epoch [2/5], Step [689/1378], Train loss: 2.6720, Train accuracy: 0.2500
Test Loss: 4.5244, Test Accuracy: 0.1230
Epoch [2/5], Step [1378/1378], Train loss: 3.9239, Train accuracy: 0.0714
Test Loss: 3.5141, Test Accuracy: 0.1242
Model saved to checkpoints_11-26_15-12-49/model_epoch_2.pth
Epoch [3/5], Step [689/1378], Train loss: 2.1852, Train accuracy: 0.5000
Test Loss: 3.4297, Test Accuracy: 0.2078
Epoch [3/5], Step [1378/1378], Train loss: 3.8191, Train accuracy: 0.0000
Test Loss: 3.6756, Test Accuracy: 0.0917
Model saved to checkpoints_11-26_15-12-49/model_epoch_3.pth
- increase learning rate
- add a layer
- more epochs
- ...