In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import matplotlib.pyplot as plt
import numpy as np


In [4]:
def generate_synthetic_wave(batch_size=8, length=16000, sample_rate=16000):
    t = torch.linspace(0, 1, steps=length)
    waves = []
    for _ in range(batch_size):
        freq = torch.randint(100, 1000, (1,)).item()
        wave = torch.sin(2 * np.pi * freq * t)
        noise = 0.05 * torch.randn_like(wave)
        waves.append((wave + noise).unsqueeze(0))  # shape: [1, length]
    return torch.stack(waves)  # [batch_size, 1, length]


In [6]:
class TinyAudioAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, 9, stride=2, padding=4),
            nn.ReLU(),
            nn.Conv1d(16, 32, 9, stride=2, padding=4),
            nn.ReLU(),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(32, 16, 9, stride=2, padding=4, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(16, 1, 9, stride=2, padding=4, output_padding=1),
            nn.Tanh()
        )

    def forward(self, x):
        z = self.encoder(x)
        return self.decoder(z)


In [12]:
def train(model, loss_fn, epochs=10, device='cuda'):
    model.to(device)
    loss_fn.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(epochs):
        model.train()
        x = generate_synthetic_wave(batch_size=16).to(device)
        y = model(x)
        loss = loss_fn(y, x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


In [14]:
model = TinyAudioAutoencoder()
losses = {
    "Waveform L2": WaveformL2Loss(),
    "FFT Loss": FFTLoss(),
    "Mel L2": MelSpecL2Loss(),
    "Mel FID": MelFIDLoss(),
}

for name, loss_fn in losses.items():
    print(f"\nðŸ§ª Training with {name}")
    train(model, loss_fn, epochs=5)



ðŸ§ª Training with Waveform L2
Epoch 1: Loss = 0.5028
Epoch 2: Loss = 0.4754
Epoch 3: Loss = 0.4481
Epoch 4: Loss = 0.4181
Epoch 5: Loss = 0.3914

ðŸ§ª Training with FFT Loss
Epoch 1: Loss = 6.9610
Epoch 2: Loss = 6.8325
Epoch 3: Loss = 6.7758
Epoch 4: Loss = 6.6426
Epoch 5: Loss = 6.5873

ðŸ§ª Training with Mel L2
Epoch 1: Loss = 86000024.0000
Epoch 2: Loss = 79670016.0000
Epoch 3: Loss = 73875232.0000
Epoch 4: Loss = 66363668.0000
Epoch 5: Loss = 63252048.0000

ðŸ§ª Training with Mel FID
Epoch 1: Loss = 57867584.0000
Epoch 2: Loss = 50311440.0000
Epoch 3: Loss = 39476568.0000
Epoch 4: Loss = 30372700.0000
Epoch 5: Loss = 22131992.0000
