In [3]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchaudio

from dataset import UrbanSoundDataset
from cnn_arch_2 import CNNNetwork       # Import model architecture

BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001

ANNOTATIONS_FILE = "data/UrbanSound8K/metadata/UrbanSound8K.csv"
AUDIO_DIR = "data/UrbanSound8K/audio"
SAMPLE_RATE = 22050 
NUM_SAMPLES = 22050

def create_data_loader(train_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size)
    return train_dataloader

def train_one_epoch(model, data_loader, loss_fn, optimizer, device):
    # cerate a loop that will iterate over the data loader
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # calcualte loss
        predictions = model(inputs)
        loss = loss_fn(predictions, targets)
        
        # backpropagate loss and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Loss: {loss.item()}")
               
def train(model, data_loader, loss_fn, optimizer, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimizer, device)
        print("---------------------------")
    print("Finished training")

Using cuda device


In [4]:
# Train script sequence:

if __name__ == "__main__":
    
    # Check if GPU is available, else use cpu
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    print(f"Using {device} device")
    
    # Instantiate our dataset object and create data loader
    mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_fft=1024,
    hop_length=512,
    n_mels=64
    )
    
    # Create instance of our UrbanSoundDataset class
    usd = UrbanSoundDataset(ANNOTATIONS_FILE, 
                            AUDIO_DIR, 
                            mel_spectrogram, 
                            SAMPLE_RATE, 
                            NUM_SAMPLES,
                            device)
    
    # Pass the datasetand batch size
    train_dataloader = create_data_loader(usd, BATCH_SIZE)
    
    # construct model and assign it to device
    cnn = CNNNetwork().to(device)
    print(cnn)
    
    #Instantuiate optimizer and loss function
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LEARNING_RATE)
    
    train(cnn, train_dataloader, loss_fn, optimizer, device, EPOCHS)

    torch.save(cnn.state_dict(), "cnn.pth")
    print("Model trained and saved")

Using cuda device
CNNNetwork(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear): Linear(in_features=2560, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)
Epoch 1
Loss: 2.44245