Final Project  
Class: E-89 Deep Learning, Fall 2025  
Student: Doug Malfacini

Title: Sound Geneartion - Restoring Damaged Sound

In [None]:
from google.colab import drive
drive.mount("/content/drive")

import tensorflow_datasets as tfds
from pathlib import Path

TFDS_DIR = Path("/content/drive/MyDrive/tfds_data")
TFDS_DIR.mkdir(parents=True, exist_ok=True)
print("TFDS cache:", TFDS_DIR)

In [1]:
!pip -q install tensorflow-datasets
!pip -q install apache-beam==2.56.0

import tensorflow_datasets as tfds
import tensorflow as tf

ds_train = tfds.load("nsynth", split="train", shuffle_files=True)
ds_val   = tfds.load("nsynth", split="valid", shuffle_files=True)

# Cap how much you use (keeps runtime/storage sane)
N_TRAIN = 20000
N_VAL   = 2000

ds_train = ds_train.take(N_TRAIN)
ds_val   = ds_val.take(N_VAL)

ex = next(iter(ds_train))
ex.keys(), ex["audio"].shape, ex["audio"].dtype



Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/nsynth/full/2.3.3...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

ModuleNotFoundError: No module named 'apache_beam'

In [None]:
from google.colab import drive
drive.mount("/content/drive")

import tensorflow_datasets as tfds
from pathlib import Path

TFDS_DIR = Path("/content/drive/MyDrive/tfds_data")
TFDS_DIR.mkdir(parents=True, exist_ok=True)
print("TFDS cache:", TFDS_DIR)

In [None]:
SR = 16000
AUDIO_LEN = 64000  # 4 seconds @ 16k
AUTOTUNE = tf.data.AUTOTUNE

def corrupt_audio(x, noise_std=0.05, dropout_prob=0.15):
    x = tf.cast(x, tf.float32)
    x_noisy = x + tf.random.normal(tf.shape(x), stddev=noise_std)

    # Random time dropout (set segments to 0)
    if tf.random.uniform([]) < dropout_prob:
        start = tf.random.uniform([], 0, AUDIO_LEN - 4000, dtype=tf.int32)
        width = tf.random.uniform([], 1000, 4000, dtype=tf.int32)
        mask = tf.concat([
            tf.ones([start]),
            tf.zeros([width]),
            tf.ones([AUDIO_LEN - start - width])
        ], axis=0)
        x_noisy = x_noisy * mask

    return x_noisy

def to_pair(ex):
    clean = ex["audio"]                     # int16 typically
    clean = tf.cast(clean, tf.float32) / 32768.0
    clean = clean[:AUDIO_LEN]
    clean = tf.pad(clean, [[0, AUDIO_LEN - tf.shape(clean)[0]]])
    corrupt = corrupt_audio(clean)
    return corrupt[..., None], clean[..., None]  # [T,1]

BATCH = 32
train_ds = (ds_train
    .map(to_pair, num_parallel_calls=AUTOTUNE)
    .batch(BATCH)
    .prefetch(AUTOTUNE)
)

val_ds = (ds_val
    .map(to_pair, num_parallel_calls=AUTOTUNE)
    .batch(BATCH)
    .prefetch(AUTOTUNE)
)

In [None]:
from tensorflow.keras import layers as L

inp = L.Input(shape=(AUDIO_LEN, 1))

x = L.Conv1D(32, 9, padding="same", activation="relu")(inp)
x = L.MaxPool1D(4)(x)
x = L.Conv1D(64, 9, padding="same", activation="relu")(x)
x = L.MaxPool1D(4)(x)

x = L.Conv1D(128, 9, padding="same", activation="relu")(x)

x = L.UpSampling1D(4)(x)
x = L.Conv1D(64, 9, padding="same", activation="relu")(x)
x = L.UpSampling1D(4)(x)
x = L.Conv1D(32, 9, padding="same", activation="relu")(x)

out = L.Conv1D(1, 1, padding="same")(x)

model = tf.keras.Model(inp, out)
model.compile(optimizer="adam", loss="mae")
model.summary()

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)