## Imports

In [1]:
import numpy as np
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras import backend 
import tensorflow as tf
print(tf.__version__)

#Unet network
def unet(pretrained_weights = None,input_size = (128,128,1)):
    size_filter_in = 16
    #kernel_init = 'glorot_uniform'
    kernel_init = 'he_normal'
    activation_layer = None 
    inputs = Input(input_size)
    conv1 = Conv2D(size_filter_in, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(inputs)
    conv1 = LeakyReLU()(conv1)
    conv1 = Conv2D(size_filter_in, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv1)
    conv1 = LeakyReLU()(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(size_filter_in*2, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(pool1)
    conv2 = LeakyReLU()(conv2)
    conv2 = Conv2D(size_filter_in*2, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv2)
    conv2 = LeakyReLU()(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    conv3 = Conv2D(size_filter_in*4, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(pool2)
    conv3 = LeakyReLU()(conv3)
    conv3 = Conv2D(size_filter_in*4, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv3)
    conv3 = LeakyReLU()(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    conv4 = Conv2D(size_filter_in*8, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(pool3)
    conv4 = LeakyReLU()(conv4)
    conv4 = Conv2D(size_filter_in*8, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv4)
    conv4 = LeakyReLU()(conv4)
    drop4 = Dropout(0.5)(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)

    conv5 = Conv2D(size_filter_in*16, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(pool4)
    conv5 = LeakyReLU()(conv5)
    conv5 = Conv2D(size_filter_in*16, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv5)
    conv5 = LeakyReLU()(conv5)
    drop5 = Dropout(0.5)(conv5)

    up6 = Conv2D(size_filter_in*8, 2, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(UpSampling2D(size = (2,2))(drop5))
    up6 = LeakyReLU()(up6)
    merge6 = concatenate([drop4,up6], axis = 3)
    conv6 = Conv2D(size_filter_in*8, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(merge6)
    conv6 = LeakyReLU()(conv6)
    conv6 = Conv2D(size_filter_in*8, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv6)
    conv6 = LeakyReLU()(conv6)
    up7 = Conv2D(size_filter_in*4, 2, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(UpSampling2D(size = (2,2))(conv6))
    up7 = LeakyReLU()(up7)
    merge7 = concatenate([conv3,up7], axis = 3)
    conv7 = Conv2D(size_filter_in*4, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(merge7)
    conv7 = LeakyReLU()(conv7)
    conv7 = Conv2D(size_filter_in*4, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv7)
    conv7 = LeakyReLU()(conv7)
    up8 = Conv2D(size_filter_in*2, 2, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(UpSampling2D(size = (2,2))(conv7))
    up8 = LeakyReLU()(up8)
    merge8 = concatenate([conv2,up8], axis = 3)
    conv8 = Conv2D(size_filter_in*2, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(merge8)
    conv8 = LeakyReLU()(conv8)
    conv8 = Conv2D(size_filter_in*2, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv8)
    conv8 = LeakyReLU()(conv8)
    
    up9 = Conv2D(size_filter_in, 2, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(UpSampling2D(size = (2,2))(conv8))
    up9 = LeakyReLU()(up9)
    merge9 = concatenate([conv1,up9], axis = 3)
    conv9 = Conv2D(size_filter_in, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(merge9)
    conv9 = LeakyReLU()(conv9)
    conv9 = Conv2D(size_filter_in, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv9)
    conv9 = LeakyReLU()(conv9)
    conv9 = Conv2D(2, 3, activation = activation_layer, padding = 'same', kernel_initializer = kernel_init)(conv9)
    conv9 = LeakyReLU()(conv9)
    conv10 = Conv2D(1, 1, activation = 'tanh')(conv9)

    model = Model(inputs,conv10)

    model.compile(optimizer = 'adam', loss = tf.keras.losses.Huber(), metrics = ['mae'])
    
    #model.summary()

    if(pretrained_weights):
    	model.load_weights(pretrained_weights)

    return model

1.15.2


In [7]:
import librosa
import os, inspect
import pathlib
# ==== paths ====
currentdir = os.getcwd()
datadir = f"{currentdir}/data"
file_no_dolby = "ELO_1_raw.wav"
file_dolby    = "ELO_1_clean.wav"

# ==== audio params ====
sr = 44100

frame_length = 32768        # ~0.74 sec
hop_length_frame = 16384   # 50% overlap

# ==== spectrogram params ====
dim_square_spec = 128
n_fft = 255 
hop_length_fft = 256


In [8]:
y_no_dolby, sr1 = librosa.load(
    os.path.join(datadir, file_no_dolby),
    sr=sr,
    mono=True
)

y_dolby, sr2 = librosa.load(
    os.path.join(datadir, file_dolby),
    sr=sr,
    mono=True
)

print("No Dolby length:", y_no_dolby.shape[0])
print("Dolby length   :", y_dolby.shape[0])


No Dolby length: 13149729
Dolby length   : 13149729


In [9]:
from data_tools import audio_to_audio_frame_stack
frames_no = audio_to_audio_frame_stack(
    y_no_dolby, frame_length, hop_length_frame
)

frames_do = audio_to_audio_frame_stack(
    y_dolby, frame_length, hop_length_frame
)

print(frames_no.shape, frames_do.shape)


(801, 32768) (801, 32768)


In [10]:
from data_tools import numpy_audio_to_matrix_spectrogram
X_mag_db, X_phase = numpy_audio_to_matrix_spectrogram(
    frames_no,
    dim_square_spec,
    n_fft,
    hop_length_fft
)

# Spectrograms for DOLBY (target)
Y_mag_db, Y_phase = numpy_audio_to_matrix_spectrogram(
    frames_do,
    dim_square_spec,
    n_fft,
    hop_length_fft
)

print(X_mag_db.shape, Y_mag_db.shape)


(801, 128, 128) (801, 128, 128)


In [11]:
X_in = X_mag_db
X_ou = X_in - Y_mag_db


In [12]:
from data_tools import scaled_in, scaled_ou

X_in = scaled_in(X_in)
X_ou = scaled_ou(X_ou)


In [13]:
from scipy import stats
print(stats.describe(X_in.reshape(-1,1)))
print(stats.describe(X_ou.reshape(-1,1)))
print(X_in.shape, X_ou.shape)


DescribeResult(nobs=13123584, minmax=(array([-0.68]), array([0.92000004])), mean=array([-0.03963888]), variance=array([0.10911228]), skewness=array([0.15955904]), kurtosis=array([-0.47248801]))
DescribeResult(nobs=13123584, minmax=(array([-0.6496643]), array([0.4505511])), mean=array([-0.02981779]), variance=array([0.00171091]), skewness=array([0.6813041]), kurtosis=array([-0.36917981]))
(801, 128, 128) (801, 128, 128)


In [14]:
X_in = X_in.reshape(X_in.shape[0], X_in.shape[1], X_in.shape[2], 1)
X_ou = X_ou.reshape(X_ou.shape[0], X_ou.shape[1], X_ou.shape[2], 1)


In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_in, X_ou, test_size=0.1, random_state=42)


In [18]:
import tensorflow as tf

# Для TF 1.x
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
print("Devices visible to TensorFlow:")
for device in sess.list_devices():
    print(device)




Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device

Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
Devices visible to TensorFlow:
_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456, 17662042371932787279)
_DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 2087586126093790765)


2026-01-26 19:19:09.453300: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2026-01-26 19:19:09.501410: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 4199895000 Hz
2026-01-26 19:19:09.503881: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x2f45e580 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2026-01-26 19:19:09.503911: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


In [None]:
# from model_unet import unet
# from tensorflow.keras.callbacks import ModelCheckpoint

# generator_nn = unet()  # или с предобученными весами
# checkpoint = ModelCheckpoint('model_best.h5', monitor='val_loss', save_best_only=True)
# history = generator_nn.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=9, batch_size=80, callbacks=[checkpoint])
