In [None]:
!pip install -q datasets[audio]

In [2]:
from datasets import load_dataset
import torch
import torchaudio
import torch.nn as nn
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from torchvision.transforms import Resize, Normalize
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

In [None]:
gtzan = load_dataset("marsyas/gtzan", "all")

In [4]:
gtzan = gtzan["train"].train_test_split(seed=42, shuffle=True, test_size=0.1)
train = gtzan["train"]
test = gtzan["test"]

In [5]:
mel_spectrogram = MelSpectrogram(sample_rate=22050, n_fft=2048, hop_length=512, n_mels=128)
to_db = AmplitudeToDB()

def audio_to_mel(audio):
    mel = mel_spectrogram(audio)
    mel_db = to_db(mel)
    return mel_db

In [6]:
resize = Resize((224, 224))

def preprocess_spectrogram(spectrogram):
    spectrogram = spectrogram.unsqueeze(0)
    spectrogram = spectrogram.repeat(3, 1, 1)
    spectrogram_resized = resize(spectrogram)
    return spectrogram_resized

In [7]:
class GTZANDataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, idx):
        audio, label = self.dataset[idx]['audio']['array'], self.dataset[idx]['genre']
        spectrogram = audio_to_mel(torch.tensor(audio, dtype=torch.float32))

        if self.transform:
            spectrogram = self.transform(spectrogram)
        return spectrogram, label

    def __len__(self):
        return len(self.dataset)

train_dataset = GTZANDataset(train, transform=preprocess_spectrogram)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = GTZANDataset(test, transform=preprocess_spectrogram)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
model = models.resnet18(pretrained=True)

for param in model.parameters():
  param.requires_grad = False

model.fc = nn.Linear(model.fc.in_features, 10)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)

In [10]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0.0
    total = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to("cuda")
        labels = labels.to("cuda")

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to("cuda")
            labels = labels.to("cuda")

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    test_loss = running_loss / len(test_loader)
    test_accuracy = 100 * correct / total

    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.3f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Test Loss: {test_loss:.3f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch 1, Train Loss: 2.182, Train Accuracy: 21.02%, Test Loss: 1.946, Test Accuracy: 29.00%
Epoch 2, Train Loss: 1.777, Train Accuracy: 40.71%, Test Loss: 1.654, Test Accuracy: 51.00%
Epoch 3, Train Loss: 1.495, Train Accuracy: 55.62%, Test Loss: 1.486, Test Accuracy: 57.00%
Epoch 4, Train Loss: 1.341, Train Accuracy: 61.96%, Test Loss: 1.310, Test Accuracy: 61.00%
Epoch 5, Train Loss: 1.243, Train Accuracy: 63.52%, Test Loss: 1.221, Test Accuracy: 59.00%
Epoch 6, Train Loss: 1.146, Train Accuracy: 66.96%, Test Loss: 1.182, Test Accuracy: 56.00%
Epoch 7, Train Loss: 1.088, Train Accuracy: 70.75%, Test Loss: 1.204, Test Accuracy: 62.00%
Epoch 8, Train Loss: 1.034, Train Accuracy: 70.08%, Test Loss: 1.078, Test Accuracy: 61.00%
Epoch 9, Train Loss: 1.004, Train Accuracy: 70.63%, Test Loss: 1.073, Test Accuracy: 61.00%
Epoch 10, Train Loss: 1.012, Train Accuracy: 68.74%, Test Loss: 1.198, Test Accuracy: 61.00%


In [11]:
for param in model.layer4.parameters():
  param.requires_grad = True

for param in model.fc.parameters():
  param.requires_grad = True

In [12]:
optimizer = torch.optim.Adam([
    {"params":model.layer4.parameters(), "lr":1e-4},
    {"params":model.fc.parameters(), "lr":1e-3}])

In [13]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    correct = 0.0
    total = 0.0

    for inputs, labels in train_loader:
        inputs = inputs.to("cuda")
        labels = labels.to("cuda")

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to("cuda")
            labels = labels.to("cuda")

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    test_loss = running_loss / len(test_loader)
    test_accuracy = 100 * correct / total

    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.3f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Test Loss: {test_loss:.3f}, Test Accuracy: {test_accuracy:.2f}%")

Epoch 1, Train Loss: 0.872, Train Accuracy: 73.64%, Test Loss: 0.722, Test Accuracy: 70.00%
Epoch 2, Train Loss: 0.254, Train Accuracy: 94.99%, Test Loss: 0.647, Test Accuracy: 75.00%
Epoch 3, Train Loss: 0.093, Train Accuracy: 99.33%, Test Loss: 0.630, Test Accuracy: 76.00%
Epoch 4, Train Loss: 0.059, Train Accuracy: 99.33%, Test Loss: 0.655, Test Accuracy: 75.00%
Epoch 5, Train Loss: 0.038, Train Accuracy: 99.89%, Test Loss: 0.764, Test Accuracy: 75.00%
Epoch 6, Train Loss: 0.054, Train Accuracy: 99.78%, Test Loss: 0.708, Test Accuracy: 77.00%
Epoch 7, Train Loss: 0.051, Train Accuracy: 99.56%, Test Loss: 0.772, Test Accuracy: 71.00%
Epoch 8, Train Loss: 0.042, Train Accuracy: 99.33%, Test Loss: 0.776, Test Accuracy: 75.00%
Epoch 9, Train Loss: 0.055, Train Accuracy: 99.67%, Test Loss: 0.749, Test Accuracy: 80.00%
Epoch 10, Train Loss: 0.036, Train Accuracy: 99.33%, Test Loss: 0.645, Test Accuracy: 77.00%


In [14]:
torch.save(model.state_dict(), '/content/model.pth')

In [15]:
from google.colab import files
files.download('/content/model.pth')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>