In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import soundfile as sf
import librosa.display
import os
import tarfile

In [None]:

# Path to the dataset tar.gz file
dataset_path = '/content/dev-clean.tar.gz'
# Directory to save extracted files and spectrograms
extracted_dir = '/content/extracted'
spectrogram_dir = '/content/mel_spectrograms'

# Create directories if they don't exist
os.makedirs(extracted_dir, exist_ok=True)
os.makedirs(spectrogram_dir, exist_ok=True)

# Path to the dataset tar.gz file
dataset_path = '/content/dev-clean.tar.gz'
# Directory to extract the files
extracted_dir = '/content/extracted'

# Create the directory if it doesn't exist
os.makedirs(extracted_dir, exist_ok=True)

# Extract the dataset
try:
    with tarfile.open(dataset_path, 'r:gz') as tar:
        tar.extractall(path=extracted_dir)
        print(f"Extraction completed successfully to {extracted_dir}")
except tarfile.ReadError:
    print("Error: Unable to read the tar file. It might be corrupted.")
except EOFError:
    print("Error: The file seems to be incomplete or corrupted.")





Extraction completed successfully to /content/extracted


In [None]:
# Function to generate and save Mel spectrogram
def save_mel_spectrogram(audio_path, save_path):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=16000)  # Using 16kHz sample rate
    # Generate Mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    S_dB = librosa.power_to_db(S, ref=np.max)

    # Save Mel spectrogram as image
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel spectrogram')
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
    print(f"Saved Mel spectrogram at {save_path}")


In [None]:
# Process each .flac file in the extracted dataset
for root, dirs, files in os.walk(extracted_dir):
    for file in files:
        if file.endswith('.flac'):
            audio_path = os.path.join(root, file)
            save_path = os.path.join(spectrogram_dir, f"{os.path.splitext(file)[0]}.png")
            save_mel_spectrogram(audio_path, save_path)


Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0032.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0028.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0014.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0004.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0009.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0001.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0030.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0019.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0020.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0016.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0018.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0024.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0027.png
Saved Mel spectrogram at /content/mel_spectrograms/8297-275155-0

In [None]:
# FINAL CODE
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import librosa
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from librosa.feature import melspectrogram

# Paths and Parameters
data_dir = '/content/extracted'  # Directory containing audio files
sample_rate = 16000
target_shape = (128, 128)  # Target shape for Mel spectrograms
noise_factor = 0.1  # Noise level

# Helper Function: Resize Spectrograms
def resize_spectrogram(spectrogram, target_shape):
    pad_width = [(0, max(0, target_shape[i] - spectrogram.shape[i])) for i in range(len(target_shape))]
    spectrogram = np.pad(spectrogram, pad_width, mode='constant')
    return spectrogram[:target_shape[0], :target_shape[1]]

# Dataset Class
class LibriSpeechDataset(Dataset):
    def __init__(self, data_dir, sample_rate=16000, target_shape=(128, 128), noise_factor=0.1):
        self.files = [os.path.join(root, file) for root, _, files in os.walk(data_dir) for file in files if file.endswith('.flac')]
        self.sample_rate = sample_rate
        self.target_shape = target_shape
        self.noise_factor = noise_factor

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        audio_path = self.files[idx]
        y, sr = librosa.load(audio_path, sr=self.sample_rate)

        # Add Gaussian noise
        noise = np.random.normal(0, self.noise_factor, y.shape)
        y_noisy = y + noise

        # Convert to Mel spectrograms and resize
        mel_clean = resize_spectrogram(librosa.power_to_db(melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000), ref=np.max), self.target_shape)
        mel_noisy = resize_spectrogram(librosa.power_to_db(melspectrogram(y=y_noisy, sr=sr, n_mels=128, fmax=8000), ref=np.max), self.target_shape)

        return torch.tensor(mel_noisy, dtype=torch.float32).unsqueeze(0), torch.tensor(mel_clean, dtype=torch.float32).unsqueeze(0)

# Data Loaders
dataset = LibriSpeechDataset(data_dir, target_shape=target_shape, noise_factor=noise_factor)
train_data, val_data = train_test_split(dataset, test_size=0.2, random_state=42)
train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)

# Denoising Autoencoder Model
class DenoisingAutoencoder(nn.Module):
    def __init__(self):
        super(DenoisingAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(True),
            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1), nn.ReLU(True),
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), nn.ReLU(True)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1), nn.ReLU(True),
            nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1), nn.ReLU(True),
            nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        return self.decoder(x)

# Training Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DenoisingAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and Validation Functions
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for noisy, clean in loader:
        noisy, clean = noisy.to(device), clean.to(device)
        optimizer.zero_grad()
        loss = criterion(model(noisy), clean)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * noisy.size(0)
    return running_loss / len(loader.dataset)

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for noisy, clean in loader:
            noisy, clean = noisy.to(device), clean.to(device)
            running_loss += criterion(model(noisy), clean).item() * noisy.size(0)
    return running_loss / len(loader.dataset)

# Training Loop
num_epochs = 20
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    val_loss = validate(model, val_loader, criterion, device)
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Save the Model
torch.save(model.state_dict(), 'denoising_autoencoder.pth')
print("Model saved successfully!")


Epoch [1/20], Train Loss: 2722.2853, Val Loss: 2719.3252
Epoch [2/20], Train Loss: 2717.5646, Val Loss: 2719.3167
Epoch [3/20], Train Loss: 2717.5587, Val Loss: 2719.3147
Epoch [4/20], Train Loss: 2717.5574, Val Loss: 2719.3138
Epoch [5/20], Train Loss: 2717.5570, Val Loss: 2719.3136
Epoch [6/20], Train Loss: 2717.5568, Val Loss: 2719.3135
Epoch [7/20], Train Loss: 2717.5566, Val Loss: 2719.3135
Epoch [8/20], Train Loss: 2717.5565, Val Loss: 2719.3133
Epoch [9/20], Train Loss: 2717.5565, Val Loss: 2719.3134
Epoch [10/20], Train Loss: 2717.5564, Val Loss: 2719.3132
Epoch [11/20], Train Loss: 2717.5563, Val Loss: 2719.3130
Epoch [12/20], Train Loss: 2717.5562, Val Loss: 2719.3130
Epoch [13/20], Train Loss: 2717.5562, Val Loss: 2719.3130
Epoch [14/20], Train Loss: 2717.5562, Val Loss: 2719.3130
Epoch [15/20], Train Loss: 2717.5561, Val Loss: 2719.3130
Epoch [16/20], Train Loss: 2717.5562, Val Loss: 2719.3129
Epoch [17/20], Train Loss: 2717.5561, Val Loss: 2719.3129
Epoch [18/20], Train Lo