According to the solution to the Aufgabe 2_b_2, can we get the solution that, the third Autoencoder AE3 has larger Advantage compared with the other 2, becaused of more layers.

a) Add a validation process to your training function6. After each training epoch,
validate the model using the validation signals and print the validation loss
averaged over the epoch.

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchaudio
from torch.utils.data import Dataset, DataLoader
import os
# Autoencoder Models


class AE3(nn.Module):
    def __init__(self):
        super(AE3, self).__init__()
        # Encoder
        self.enc1 = nn.Linear(512, 384)
        self.enc2 = nn.Linear(384, 256)
        self.enc3 = nn.Linear(256, 128)
        
        # Decoder
        self.dec1 = nn.Linear(128, 256)
        self.dec2 = nn.Linear(256, 384)
        self.dec3 = nn.Linear(384, 512)

    def forward(self, x):
        x = F.tanh(self.enc1(x))
        x = F.tanh(self.enc2(x))
        x = F.tanh(self.enc3(x))
        x = F.tanh(self.dec1(x))
        x = F.tanh(self.dec2(x))
        x = F.tanh(self.dec3(x))
        return x

# Custom Audio Dataset
class AudioDataset(Dataset):
    def __init__(self, audio_files, signal_length, frame_length, overlap):
        """
        Args:
            audio_files (list): List of paths to audio files.
            signal_length (int): Desired length of the signal in samples (La).
            frame_length (int): Frame length in samples (LF).
            overlap (int): Overlap of frames in samples (O).
        """
        self.audio_files = audio_files
        self.signal_length = signal_length
        self.frame_length = frame_length
        self.overlap = overlap

    def __len__(self):
        return len(self.audio_files)
    
    def _adjust_frame_length_for_testing(self, frame):
        if frame.shape[1] < 512:
            padding = 512 - frame.shape[1]
            frame = F.pad(frame, (0, padding))
        elif frame.shape[1] > 512:
            frame = frame[:, :512]
        return frame

    def __getitem__(self, idx):
        audio_path = self.audio_files[idx]
        waveform, _ = torchaudio.load(audio_path)

        # Truncate or Zero-pad the signal
        waveform = self._adjust_length(waveform)

        # Normalize the signal
        waveform = self._normalize(waveform)

        # Segment into frames
        frames = self._segment_into_frames(waveform)

        # Adjust frame length for testing
        adjusted_frames = torch.zeros((frames.shape[0], 512))
        for i, frame in enumerate(frames):
            adjusted_frames[i] = self._adjust_frame_length_for_testing(frame.unsqueeze(0))

        return adjusted_frames
        

    def _adjust_length(self, waveform):
        if waveform.shape[1] > self.signal_length:
            return waveform[:, :self.signal_length]
        elif waveform.shape[1] < self.signal_length:
            padding = self.signal_length - waveform.shape[1]
            return F.pad(waveform, (0, padding))
        else:
            return waveform

    def _normalize(self, waveform):
        max_val = torch.max(torch.abs(waveform))
        if max_val > 0:
            return waveform / max_val
        return waveform

    def _segment_into_frames(self, waveform):
        step = self.frame_length - self.overlap
        num_frames = 1 + (waveform.shape[1] - self.frame_length) // step
        frames = torch.zeros((num_frames, self.frame_length))

        for i in range(num_frames):
            start = i * step
            end = start + self.frame_length
            frames[i] = waveform[0, start:end]

        return frames

# Training Function
def train(model, train_loader, validation_loader, epochs, device):
    model = model.to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    for epoch in range(epochs):
        model.train()
        train_loss = 0

        for batch_idx, data in enumerate(train_loader):
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, data)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

            
            print(f"Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item()}")

        avg_loss_train = train_loss / len(train_loader)
        print(f"Epoch {epoch}, Average Train Loss: {avg_loss_train}")
        # Validation step
        model.eval()
        validation_loss = 0
        with torch.no_grad():
            for data in validation_loader:
                data = data.to(device)
                output = model(data)
                loss = criterion(output, data)
                validation_loss += loss.item()
        avg_validation_loss = validation_loss / len(validation_loader)
        print(f"Epoch {epoch}, Validation Loss: {avg_validation_loss}")








# Testing Function
def test(model, test_loader, device, overlap=0):
    model = model.to(device)
    model.eval()
    total_snr = 0
    count = 0

    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            #print(data.shape)
            reconstructed = model(data)
            original_signal = overlap_add(data, overlap)
            reconstructed_signal = overlap_add(reconstructed, overlap)
            snr = calculate_snr(original_signal, reconstructed_signal)
            print(f"Signal {count}, SNR: {snr}")
            total_snr += snr
            count += 1

    avg_snr = total_snr / count
    print(f"Average SNR: {avg_snr}")

# Helper Functions
def calculate_snr(original, reconstructed):
    noise = original - reconstructed
    signal_power = torch.mean(original ** 2)
    noise_power = torch.mean(noise ** 2)
    snr = 10 * torch.log10(signal_power / noise_power)
    return snr.item()



def overlap_add(frames, overlap):
    frames = frames.squeeze()
    frame_length = frames.shape[1] #L
    frame_zahl= frames.shape[0] #N
    step = frame_length - overlap
    signal_length = step * (frames.shape[0] - 1) + frame_length
    signal = torch.zeros(signal_length)
    window = torch.hann_window(frame_length)  # Hann Rectangular window
    
    

    for i in range(frame_zahl):
        start = i *step
        end = start + frame_length
        #print(end-start, frames[i].size(), window.size())
        signal[start:end] += frames[i] * window

    return signal

In [5]:
# output and save one of the reconstructed test signal for each of the autoencoder
def test_and_save(model, test_loader, device, overlap, model_name, sampling_rate):
    model = model.to(device)
    model.eval()
    with torch.no_grad():
        # Process only the first batch from the test loader
        for data in test_loader:
            data = data.to(device)
            reconstructed = model(data)
            reconstructed_signal = overlap_add(reconstructed, overlap)
            
            # Save the reconstructed signal to a WAV file
            filename = f"{model_name}_reconstructed.wav"
            torchaudio.save(filename, reconstructed_signal.unsqueeze(0), sampling_rate)
            print(f"Reconstructed signal saved as {filename}")

            break  # Process only the first batch

In [6]:
# Assuming a sampling rate of 16 kHz
sampling_rate = 16000
signal_length_seconds = 6  # 6 seconds
signal_length_samples = sampling_rate * signal_length_seconds  # Convert to samples

# Function to get audio file paths
def get_audio_files(directory_path):
    return [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.wav')]

# Directory path for training data
train_directory_path = r'C:\Kursmaterial\Dl der Sprachsignalverarbeitung\Computerübung 2\signals\Train_40'
train_audio_files = get_audio_files(train_directory_path)

# Directory path for validation data
validation_directory_path = r'C:\Kursmaterial\Dl der Sprachsignalverarbeitung\Computerübung 2\signals\Dev_16'
validation_audio_files = get_audio_files(train_directory_path)


# Directory path for test data
test_directory_path = r'C:\Kursmaterial\Dl der Sprachsignalverarbeitung\Computerübung 2\signals\Test_16'
test_audio_files = get_audio_files(test_directory_path)



# Training and testing data loaders
frame_length = 512
frame_length_test = 512

# Update the Overlap
overlap = int(0.5 * frame_length)


train_dataset = AudioDataset(train_audio_files, signal_length_samples, frame_length, overlap)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_dataset = AudioDataset(validation_audio_files, signal_length_samples, frame_length, overlap)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=True)
test_dataset = AudioDataset(test_audio_files, signal_length_samples, frame_length_test, overlap)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)






# Function to train and test a model
def train_and_test_model(model_class, train_loader, validation_loader, test_loader, device, model_name):
    model = model_class().to(device)
    train(model, train_loader, validation_loader, 20, device)  
    test(model, test_loader, device)
    test_and_save(model, test_loader, device, 0, model_name,  sampling_rate)
device = torch.device("cpu")



# Train and test AE3
print("Training and Testing AE3")
train_and_test_model(AE3, train_loader, validation_loader, test_loader, device, "AE3")


Training and Testing AE3
Epoch 0, Batch 0, Loss: 0.013360762037336826
Epoch 0, Batch 1, Loss: 0.014740238897502422
Epoch 0, Average Train Loss: 0.014050500467419624
Epoch 0, Validation Loss: 0.012575080152601004
Epoch 1, Batch 0, Loss: 0.011483998037874699
Epoch 1, Batch 1, Loss: 0.015338124707341194
Epoch 1, Average Train Loss: 0.013411061372607946
Epoch 1, Validation Loss: 0.012347400188446045
Epoch 2, Batch 0, Loss: 0.01212095282971859
Epoch 2, Batch 1, Loss: 0.010705851018428802
Epoch 2, Average Train Loss: 0.011413401924073696
Epoch 2, Validation Loss: 0.011353570967912674
Epoch 3, Batch 0, Loss: 0.011435553431510925
Epoch 3, Batch 1, Loss: 0.010787340812385082
Epoch 3, Average Train Loss: 0.011111447121948004
Epoch 3, Validation Loss: 0.01024607103317976
Epoch 4, Batch 0, Loss: 0.01062150951474905
Epoch 4, Batch 1, Loss: 0.010837488807737827
Epoch 4, Average Train Loss: 0.010729499161243439
Epoch 4, Validation Loss: 0.009912596549838781
Epoch 5, Batch 0, Loss: 0.01023769471794366