In [2]:
import pandas as pd  # type: ignore
import torch  # type: ignore

from src.utils.unitary_linear_norm import unitary_norm

In [269]:
# normalize the RAPs
# read the manifest file

train_manifest = pd.read_csv("./data/noiseReverbSpeech/train_manifest_alt.csv")
test_manifest = pd.read_csv("./data/noiseReverbSpeech/test_manifest_alt.csv")
val_manifest = pd.read_csv("./data/noiseReverbSpeech/val_manifest_alt.csv")

# concat the dataframes
manifest = pd.concat([train_manifest, test_manifest, val_manifest])

In [270]:
train_manifest.insert(33, "real_recording", 0)
val_manifest.insert(33, "real_recording", 0)
test_manifest.insert(33, "real_recording", 0)

In [272]:
train_manifest.to_csv("./data/noiseReverbSpeech/train_manifest_alt.csv", index=False)
test_manifest.to_csv("./data/noiseReverbSpeech/test_manifest_alt.csv", index=False)
val_manifest.to_csv("./data/noiseReverbSpeech/val_manifest_alt.csv", index=False)

In [198]:
manifest["STI"].to_numpy()

array([0.6012, 0.5125, 0.874 , ..., 0.5334, 0.7469, 0.5954])

In [199]:
sti = manifest["STI"].to_numpy()
alcons = manifest["ALCONS"].to_numpy()
t60 = manifest["T60"].to_numpy()
edt = manifest["EDT"].to_numpy()
c80 = manifest["C80"].to_numpy()
c50 = manifest["C50"].to_numpy()
d50 = manifest["D50"].to_numpy().round(decimals=4)
ts = manifest["TS"].to_numpy()

volume = manifest["volume"].to_numpy()
volume_log10 = manifest["volume_log10"].to_numpy()
dist_src = manifest["distRcv"].to_numpy()
Th = manifest["Th"].to_numpy()
Tt = manifest["Tt"].to_numpy()


sti_norm = unitary_norm(torch.from_numpy(sti)).round(decimals=4).numpy()
alcons_norm = unitary_norm(torch.from_numpy(alcons)).round(decimals=4).numpy()
t60_norm = unitary_norm(torch.from_numpy(t60)).round(decimals=4).numpy()
edt_norm = unitary_norm(torch.from_numpy(edt)).round(decimals=4).numpy()
c80_norm = unitary_norm(torch.from_numpy(c80)).round(decimals=4).numpy()
c50_norm = unitary_norm(torch.from_numpy(c50)).round(decimals=4).numpy()
d50_norm = unitary_norm(torch.from_numpy(d50)).round(decimals=4).numpy()
ts_norm = unitary_norm(torch.from_numpy(ts)).round(decimals=4).numpy()

t60_std = torch.std(torch.from_numpy(t60_norm)).round(decimals=4).numpy()
volume_std = torch.std(torch.from_numpy(volume)).round(decimals=0).numpy()
dist_src_std = torch.std(torch.from_numpy(dist_src)).round(decimals=5).numpy()
volume_log10_std = torch.std(torch.from_numpy(volume_log10)).round(decimals=5).numpy()
Th_std = torch.std(torch.from_numpy(Th)).round(decimals=5).numpy()
Tt_std = torch.std(torch.from_numpy(Tt)).round(decimals=5).numpy()


print(
    "upper bound of sti: ",
    sti.max(),
    "lower bound of sti: ",
    sti.min(),
)
print(
    "upper bound of alcons: ",
    alcons.max(),
    "lower bound of alcons: ",
    alcons.min(),
)
print(
    "upper bound of t60: ",
    t60.max(),
    "lower bound of t60: ",
    t60.min(),
)
print(
    "upper bound of edt: ",
    edt.max(),
    "lower bound of edt: ",
    edt.min(),
)
print(
    "upper bound of c80: ",
    c80.max(),
    "lower bound of c80: ",
    c80.min(),
)
print(
    "upper bound of c50: ",
    c50.max(),
    "lower bound of c50: ",
    c50.min(),
)
print(
    "upper bound of d50: ",
    d50.max(),
    "lower bound of d50: ",
    d50.min(),
)
print(
    "upper bound of ts: ",
    ts.max(),
    "lower bound of ts: ",
    ts.min(),
)
print(
    "upper bound of volume: ",
    volume_log10.max(),
    "lower bound of volume: ",
    volume_log10.min(),
)
print(
    "upper bound of dist_src: ",
    dist_src.max(),
    "lower bound of dist_src: ",
    dist_src.min(),
)


print(
    "standard deviation of t60: ",
    t60_std,
    "\n standard deviation of volume_log10: ",
    volume_log10_std,
    "\nstandard deviation of dist_src: ",
    dist_src_std,
)

upper bound of sti:  0.8843 lower bound of sti:  0.2729
upper bound of alcons:  38.8632 lower bound of alcons:  1.4143
upper bound of t60:  7.958 lower bound of t60:  0.188
upper bound of edt:  6.489 lower bound of edt:  0.016
upper bound of c80:  25.5543 lower bound of c80:  -5.9572
upper bound of c50:  21.8961 lower bound of c50:  -8.4706
upper bound of d50:  0.9936 lower bound of d50:  0.1245
upper bound of ts:  0.4452 lower bound of ts:  0.0034
upper bound of volume:  3.954 lower bound of volume:  1.505
upper bound of dist_src:  28.35 lower bound of dist_src:  0.191
standard deviation of t60:  0.2231 
 standard deviation of volume_log10:  0.67741 
standard deviation of dist_src:  6.29148


In [195]:
Tt_mu = torch.mean(torch.from_numpy(Tt)).round(decimals=5).numpy()
Tt_std = torch.std(torch.from_numpy(Tt)).round(decimals=5).numpy()
print("mean of Tt: ", Tt_mu, "\n standard deviation of Tt: ", Tt_std)

mean of Tt:  1.73133 
 standard deviation of Tt:  1.73343


In [301]:
# read real recordings manifest
train_real_manifest = pd.read_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/train_manifest.csv"
)
val_real_manifest = pd.read_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/val_manifest.csv"
)
test_real_manifest = pd.read_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/test_manifest.csv"
)

# concat the real dataframes with the manifest
real_manifest = pd.concat([train_real_manifest, val_real_manifest, test_real_manifest])

# investigate the upper and lower bounds of the real data
sti_real = real_manifest["STI"].to_numpy()
alcons_real = real_manifest["ALCONS"].to_numpy()
t60_real = real_manifest["T60"].to_numpy()
edt_real = real_manifest["EDT"].to_numpy()
c80_real = real_manifest["C80"].to_numpy()
c50_real = real_manifest["C50"].to_numpy()
d50_real = real_manifest["D50"].to_numpy().round(decimals=4)
ts_real = real_manifest["TS"].to_numpy()
volume_real = real_manifest["volume_log10"].to_numpy()
dist_src_real = real_manifest["distRcv"].to_numpy()
Th_real = real_manifest["Th"].to_numpy()

print(
    "upper bound of sti_real: ",
    sti_real.max(),
    "lower bound of sti_real: ",
    sti_real.min(),
)

print(
    "upper bound of alcons_real: ",
    alcons_real.max(),
    "lower bound of alcons_real: ",
    alcons_real.min(),
)

print(
    "upper bound of t60_real: ",
    t60_real.max(),
    "lower bound of t60_real: ",
    t60_real.min(),
)

print(
    "upper bound of edt_real: ",
    edt_real.max(),
    "lower bound of edt_real: ",
    edt_real.min(),
)

print(
    "upper bound of c80_real: ",
    c80_real.max(),
    "lower bound of c80_real: ",
    c80_real.min(),
)

print(
    "upper bound of c50_real: ",
    c50_real.max(),
    "lower bound of c50_real: ",
    c50_real.min(),
)

print(
    "upper bound of d50_real: ",
    d50_real.max(),
    "lower bound of d50_real: ",
    d50_real.min(),
)

print(
    "upper bound of ts_real: ",
    ts_real.max(),
    "lower bound of ts_real: ",
    ts_real.min(),
)

print(
    "upper bound of volume_real: ",
    volume_real.max(),
    "lower bound of volume_real: ",
    volume_real.min(),
)

print(
    "upper bound of dist_src_real: ",
    dist_src_real.max(),
    "lower bound of dist_src_real: ",
    dist_src_real.min(),
)

upper bound of sti_real:  0.8849 lower bound of sti_real:  0.2997
upper bound of alcons_real:  33.6024 lower bound of alcons_real:  1.4101
upper bound of t60_real:  2.903 lower bound of t60_real:  0.185
upper bound of edt_real:  3.794 lower bound of edt_real:  0.016
upper bound of c80_real:  25.7361 lower bound of c80_real:  -4.7571
upper bound of c50_real:  22.0103 lower bound of c50_real:  -7.3502
upper bound of d50_real:  0.9937 lower bound of d50_real:  0.1555
upper bound of ts_real:  0.3507 lower bound of ts_real:  0.0034
upper bound of volume_real:  3.432 lower bound of volume_real:  1.993
upper bound of dist_src_real:  16.1 lower bound of dist_src_real:  0.604


In [302]:
import numpy as np

STI_real_train = train_real_manifest["STI"].to_numpy()
ALCONS_real_train = train_real_manifest["ALCONS"].to_numpy()
T60_real_train = train_real_manifest["T60"].to_numpy()
EDT_real_train = train_real_manifest["EDT"].to_numpy()
C80_real_train = train_real_manifest["C80"].to_numpy()
C50_real_train = train_real_manifest["C50"].to_numpy()
D50_real_train = train_real_manifest["D50"].to_numpy().round(decimals=4)
TS_real_train = train_real_manifest["TS"].to_numpy()
dist_src_real_train = train_real_manifest["distRcv"].to_numpy()

STI_real_val = val_real_manifest["STI"].to_numpy()
ALCONS_real_val = val_real_manifest["ALCONS"].to_numpy()
T60_real_val = val_real_manifest["T60"].to_numpy()
EDT_real_val = val_real_manifest["EDT"].to_numpy()
C80_real_val = val_real_manifest["C80"].to_numpy()
C50_real_val = val_real_manifest["C50"].to_numpy()
D50_real_val = val_real_manifest["D50"].to_numpy().round(decimals=4)
TS_real_val = val_real_manifest["TS"].to_numpy()
dist_src_real_val = val_real_manifest["distRcv"].to_numpy()

STI_real_test = test_real_manifest["STI"].to_numpy()
ALCONS_real_test = test_real_manifest["ALCONS"].to_numpy()
T60_real_test = test_real_manifest["T60"].to_numpy()
EDT_real_test = test_real_manifest["EDT"].to_numpy()
C80_real_test = test_real_manifest["C80"].to_numpy()
C50_real_test = test_real_manifest["C50"].to_numpy()
D50_real_test = test_real_manifest["D50"].to_numpy().round(decimals=4)
TS_real_test = test_real_manifest["TS"].to_numpy()
dist_src_real_test = test_real_manifest["distRcv"].to_numpy()

norm_ts = lambda x: np.round((x - 0.0034) / (0.4452 - 0.0034), 4)  # noqa: E731
norm_dist_src = lambda x: np.round((x - 0.191) / (28.35 - 0.191), 4)  # noqa: E731
ts_norm_train = norm_ts(TS_real_train)
ts_norm_val = norm_ts(TS_real_val)
ts_norm_test = norm_ts(TS_real_test)

dist_src_norm_train = norm_dist_src(dist_src_real_train)
dist_src_norm_val = norm_dist_src(dist_src_real_val)
dist_src_norm_test = norm_dist_src(dist_src_real_test)

In [303]:
# insert the normalized values into the dataframes
train_real_manifest.insert(21, "TS_norm", ts_norm_train)
train_real_manifest.insert(22, "dist_src_norm", dist_src_norm_train)

val_real_manifest.insert(21, "TS_norm", ts_norm_val)
val_real_manifest.insert(22, "dist_src_norm", dist_src_norm_val)

test_real_manifest.insert(21, "TS_norm", ts_norm_test)
test_real_manifest.insert(22, "dist_src_norm", dist_src_norm_test)

In [307]:
train_real_manifest.insert(23, "real_recording", 1)
val_real_manifest.insert(23, "real_recording", 1)
test_real_manifest.insert(23, "real_recording", 1)

In [309]:
# update the manifest files
train_real_manifest.to_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/train_manifest.csv",
    index=False,
)
val_real_manifest.to_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/val_manifest.csv",
    index=False,
)
test_real_manifest.to_csv(
    "./data/BUT_real_recording_11160samples/real_audio.metadata/test_manifest.csv",
    index=False,
)

In [None]:
# plot the histograms
import matplotlib.pyplot as plt
import seaborn as sns

fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(sti, bins=50, color="blue", ax=ax)
ax.set_title("STI distribution")
ax.set_xlabel("STI")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(alcons, bins=50, color="blue", ax=ax)
ax.set_title("ALCONS distribution")
ax.set_xlabel("ALCONS")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(t60, bins=50, color="blue", ax=ax)
ax.set_title("T60 distribution")
ax.set_xlabel("T60")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(edt, bins=50, color="blue", ax=ax)
ax.set_title("EDT distribution")
ax.set_xlabel("EDT")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(c80, bins=50, color="blue", ax=ax)
ax.set_title("C80 distribution")
ax.set_xlabel("C80")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(c50, bins=50, color="blue", ax=ax)
ax.set_title("C50 distribution")
ax.set_xlabel("C50")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(d50, bins=50, color="blue", ax=ax)
ax.set_title("D50 distribution")
ax.set_xlabel("D50")
ax.set_ylabel("Number of samples")
plt.show()

In [None]:
# plot the histograms
fig, ax = plt.subplots()
plt.rcParams["font.family"] = "serif"
sns.histplot(ts, bins=50, color="blue", ax=ax)
ax.set_title("Ts distribution")
ax.set_xlabel("Ts")
ax.set_ylabel("Number of samples")
plt.show()

In [206]:
# insert the normalized values into the manifest
manifest.insert(23, "STI_norm", sti_norm)
manifest.insert(24, "ALCONS_norm", alcons_norm)
manifest.insert(25, "T60_norm", t60_norm)
manifest.insert(26, "EDT_norm", edt_norm)
manifest.insert(27, "C80_norm", c80_norm)
manifest.insert(28, "C50_norm", c50_norm)
manifest.insert(29, "D50_norm", d50_norm)
manifest.insert(30, "TS_norm", ts_norm)

In [208]:
# split again the manifest into train, test and val
train_manifest_ = manifest[: len(train_manifest)]
test_manifest_ = manifest[
    len(train_manifest) : len(train_manifest) + len(test_manifest)
]
val_manifest_ = manifest[len(train_manifest) + len(test_manifest) :]

In [209]:
train_manifest_.to_csv("./data/noiseReverbSpeech/train_manifest_alt.csv", index=False)

In [210]:
test_manifest_.to_csv("./data/noiseReverbSpeech/test_manifest_alt.csv", index=False)

In [211]:
val_manifest_.to_csv("./data/noiseReverbSpeech/val_manifest_alt.csv", index=False)

In [188]:
# get the real-recorded data path list
import os
from pathlib import Path

real_recording_path = "/home/lucianius/Data/Datasets/Librispeech_test_clean_retransmission/VUT_FIT_D105/MicID01/SpkID07_20170904_T/01/english/LibriSpeech/test-clean/"
librispeech_folder = Path(real_recording_path)
extension = ".wav"
matching_files = librispeech_folder.rglob(f"*{extension}")
matching_files = [str(x) for x in matching_files]
# sort the files
matching_files.sort()

In [190]:
import random

from src.preprocessing.RIRutils import checkfolder_BUT

In [None]:
path_retrans = "/home/lucianius/Data/Datasets/Librispeech_test_clean_retransmission"
path_rir = "/home/lucianius/Data/Datasets/BUT_ReverbDB"
num_files = 10
j = 0
n = 0
folderNos = [4, 6, 7, 8, 9]
ThTtDistRcvOriSrc_label = []
random.seed(3407)
for folderNo in folderNos:
    print("Processing folder " + str(folderNo) + "...")
    path_ = path_retrans + checkfolder_BUT(folderNo)
    lst = os.listdir(path_)
    lst.sort()
    path = path_rir + checkfolder_BUT(folderNo)
    for foldername in lst:
        if foldername.startswith("SpkID"):
            print("Processing speaker " + foldername + "...")
            for i in range(1, 32):
                retransed_path = os.path.join(
                    path_,
                    foldername,
                    str(i).zfill(2),
                    "english/LibriSpeech/test-clean/",
                )
                librispeech_folder = Path(retransed_path)
                extension = ".wav"
                matching_files = librispeech_folder.rglob(f"*{extension}")
                matching_files = [str(x) for x in matching_files]
                # sort the files
                matching_files.sort()
                # randomly choose some audio file
                # real_audio_paths = random.sample(matching_files, num_files)

                real_audio_paths = matching_files[j * num_files : (j + 1) * num_files]
                if len(real_audio_paths) < num_files:
                    # handle the last batch
                    real_audio_paths = random.sample(matching_files, num_files)
                print(real_audio_paths)
                j += 1