In [2]:
## Writing to s3
from io import BytesIO
import numpy as np
from urllib.parse import urlparse
import boto3
client = boto3.client("s3")

def to_s3_npy(data: np.array, s3_uri: str):
    # s3_uri looks like f"s3://{BUCKET_NAME}/{KEY}"
    bytes_ = BytesIO()
    np.save(bytes_, data, allow_pickle=True)
    bytes_.seek(0)
    parsed_s3 = urlparse(s3_uri)
    client.upload_fileobj(
        Fileobj=bytes_, Bucket=parsed_s3.netloc, Key=parsed_s3.path[1:]
    )
    return True

def from_s3_npy(s3_uri: str):
    bytes_ = BytesIO()
    parsed_s3 = urlparse(s3_uri)
    client.download_fileobj(
        Fileobj=bytes_, Bucket=parsed_s3.netloc, Key=parsed_s3.path[1:]
    )
    bytes_.seek(0)
    return np.load(bytes_, allow_pickle=True)



In [3]:
data = from_s3_npy("s3://rtvc-data/preprocessed/synthesizer_librispeech_valid.npy")
data.shape

(2333, 4)

In [1]:
import numpy as np
import pandas as pd

In [None]:

df["id"] = df.audio_path.apply(lambda x:"-".join(x.split(".")[0].split("-")[1:]))

In [12]:
from glob import glob

df = pd.DataFrame(glob("../data/LibriSpeech/dev-clean/*/*/*") + glob("../data/LibriSpeech/train-clean-100/*/*/*"))

df["split"] = df[0].apply(lambda x:x.split("/")[3])
df["id"] = df[0].apply(lambda x:x.split("/")[-1].split(".")[0])
df.drop(0, axis=1, inplace=True)

base_dir = "../data/SV2TTS/synthesizer"

data = []
with open(f"{base_dir}/train.txt", "r") as fh:
    for line in fh.readlines():
        line = line.split("|")
        line.insert(0, "-".join(line[0].split("-")[1:]).split(".")[0])
        data.append(line)     

df = df.merge(pd.DataFrame(data, columns=["id", "audio_path", "mels_path", "embeds_path", "wav_len", "mel_len", "text"]))

df.head()

Unnamed: 0,split,id,audio_path,mels_path,embeds_path,wav_len,mel_len,text
0,dev-clean,5536-43363-0010,audio-5536-43363-0010.npy,mel-5536-43363-0010.npy,embed-5536-43363-0010.npy,156480,783,NO DOUBT MANY PREDICTIONS HAVE BEEN COLORED TO...
1,dev-clean,5536-43363-0007,audio-5536-43363-0007.npy,mel-5536-43363-0007.npy,embed-5536-43363-0007.npy,148320,742,AT EVERY MEAL TIME A DISH OF FOOD WAS PLACED U...
2,dev-clean,5536-43363-0014,audio-5536-43363-0014.npy,mel-5536-43363-0014.npy,embed-5536-43363-0014.npy,171840,860,AT THE AGE OF ABOUT SEVENTY FIVE YEARS HE SAVE...
3,dev-clean,5536-43363-0001,audio-5536-43363-0001.npy,mel-5536-43363-0001.npy,embed-5536-43363-0001.npy,103680,519,THEREFORE HE COURTS DEATH IN BATTLE ON THE OTH...
4,dev-clean,5536-43363-0017,audio-5536-43363-0017.npy,mel-5536-43363-0017.npy,embed-5536-43363-0017.npy,134400,673,AT ANOTHER TIME WHEN I WAS FOURTEEN YEARS OLD ...


In [15]:
df.split.value_counts()

train-clean-100    10280
dev-clean           2333
Name: split, dtype: int64

In [18]:
tmp = df[df.split=="dev-clean"]

for name in ["mels", "embeds"]:
    tmp[name] = tmp[f"{name}_path"].apply(lambda x: np.load(f"{base_dir}/{name}/{x}"))
    
tmp = tmp[["text", "mels", "embeds", "mel_len"]]

tmp.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,text,mels,embeds,mel_len
0,NO DOUBT MANY PREDICTIONS HAVE BEEN COLORED TO...,"[[-3.113764, -3.4454412, -4.0, -3.9672627, -3....","[0.0, 0.011862858, 0.072159685, 0.0, 0.0046737...",783
1,AT EVERY MEAL TIME A DISH OF FOOD WAS PLACED U...,"[[-2.687537, -3.5373487, -4.0, -3.3967917, -3....","[0.0, 0.0, 0.11203939, 0.0, 0.0, 0.0, 0.075544...",742
2,AT THE AGE OF ABOUT SEVENTY FIVE YEARS HE SAVE...,"[[-2.7382286, -3.8348107, -4.0, -3.4696324, -2...","[0.0, 0.00030589534, 0.06469827, 0.0, 0.0, 0.0...",860
3,THEREFORE HE COURTS DEATH IN BATTLE ON THE OTH...,"[[-3.9332054, -4.0, -3.2723053, -3.0335386, -3...","[0.0, 0.0, 0.06820358, 0.0, 0.0, 0.0, 0.034092...",519
4,AT ANOTHER TIME WHEN I WAS FOURTEEN YEARS OLD ...,"[[-3.8848734, -3.7270896, -3.6729581, -3.31327...","[0.0, 0.011425146, 0.12177499, 0.0, 0.00867847...",673


In [20]:
to_s3_npy(tmp.to_numpy(), "s3://rtvc-data/synthesizer_librispeech_valid.npy")

True

In [21]:
tmp = df[df.split!="dev-clean"]

for name in ["mels", "embeds"]:
    tmp[name] = tmp[f"{name}_path"].apply(lambda x: np.load(f"{base_dir}/{name}/{x}"))
    
tmp = tmp[["text", "mels", "embeds", "mel_len"]]

tmp.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,text,mels,embeds,mel_len
2333,AT THE WATER WHICH LIES FAR OUT YONDER AND THE...,"[[-4.0, -4.0, -4.0, -4.0, -4.0, -4.0, -4.0, -4...","[0.0, 0.0, 0.0, 0.0, 0.005339365, 0.022676293,...",481
2334,BUT NO MAN ENTERS IT WITHOUT LEAVING HIS BODY ...,"[[-2.9456725, -3.2813048, -2.9541447, -2.76079...","[0.0, 0.0, 0.0, 0.0, 0.06057082, 0.045229267, ...",385
2335,FAR AWAY IN NORTH AMERICA WHERE THE RED INDIAN...,"[[-3.2663932, -3.8094645, -3.6030595, -3.34865...","[0.0, 0.039322924, 0.0, 0.0, 0.0, 0.0, 0.0, 0....",291
2336,AND ON THE TOP OF THESE HE FOUND A HUT OR WIGW...,"[[-3.5826182, -4.0, -4.0, -3.3893008, -3.97740...","[0.0, 0.006501352, 0.008210721, 0.0, 0.0052901...",826
2337,AT THE GATE MY MESSENGER AWAITS YOU AND YOU SH...,"[[-2.6499336, -2.6682713, -2.9672568, -3.26982...","[0.0, 0.027821857, 0.01570378, 0.0, 0.0, 0.031...",377


In [22]:
tmp.shape

(10280, 4)

In [23]:
to_s3_npy(tmp.to_numpy(), "s3://rtvc-data/synthesizer_librispeech_train.npy")

True

In [2]:
base_dir = "../data/SV2TTS/synthesizer"

with open(f"{base_dir}/train.txt", "r") as fh:
    data = [line.split("|") for line in fh.readlines()]

df = pd.DataFrame(data, columns=["audio_path", "mels_path", "embeds_path", "wav_len", "mel_len", "text"])

for name in ["mels", "embeds"]:
    df[name] = df[f"{name}_path"].apply(lambda x: np.load(f"{base_dir}/{name}/{x}"))

df["id"] = df.audio_path.apply(lambda x:"-".join(x.split(".")[0].split("-")[1:]))
    
df.head()

Unnamed: 0,audio_path,mels_path,embeds_path,wav_len,mel_len,text,mels,embeds,id
0,audio-5536-43363-0010.npy,mel-5536-43363-0010.npy,embed-5536-43363-0010.npy,156480,783,NO DOUBT MANY PREDICTIONS HAVE BEEN COLORED TO...,"[[-3.113764, -3.4454412, -4.0, -3.9672627, -3....","[0.0, 0.011862858, 0.072159685, 0.0, 0.0046737...",5536-43363-0010
1,audio-5536-43363-0007.npy,mel-5536-43363-0007.npy,embed-5536-43363-0007.npy,148320,742,AT EVERY MEAL TIME A DISH OF FOOD WAS PLACED U...,"[[-2.687537, -3.5373487, -4.0, -3.3967917, -3....","[0.0, 0.0, 0.11203939, 0.0, 0.0, 0.0, 0.075544...",5536-43363-0007
2,audio-5536-43363-0014.npy,mel-5536-43363-0014.npy,embed-5536-43363-0014.npy,171840,860,AT THE AGE OF ABOUT SEVENTY FIVE YEARS HE SAVE...,"[[-2.7382286, -3.8348107, -4.0, -3.4696324, -2...","[0.0, 0.00030589534, 0.06469827, 0.0, 0.0, 0.0...",5536-43363-0014
3,audio-5536-43363-0001.npy,mel-5536-43363-0001.npy,embed-5536-43363-0001.npy,103680,519,THEREFORE HE COURTS DEATH IN BATTLE ON THE OTH...,"[[-3.9332054, -4.0, -3.2723053, -3.0335386, -3...","[0.0, 0.0, 0.06820358, 0.0, 0.0, 0.0, 0.034092...",5536-43363-0001
4,audio-5536-43363-0017.npy,mel-5536-43363-0017.npy,embed-5536-43363-0017.npy,134400,673,AT ANOTHER TIME WHEN I WAS FOURTEEN YEARS OLD ...,"[[-3.8848734, -3.7270896, -3.6729581, -3.31327...","[0.0, 0.011425146, 0.12177499, 0.0, 0.00867847...",5536-43363-0017


In [3]:
df.shape

(12613, 9)

In [4]:
from glob import glob

mapper = pd.DataFrame(glob("../data/LibriSpeech/dev-clean/*/*/*") + glob("../data/LibriSpeech/train-clean-100/*/*/*"))

mapper["split"] = mapper[0].apply(lambda x:x.split("/")[3])

mapper["id"] = mapper[0].apply(lambda x:x.split("/")[-1].split(".")[0])

mapper.drop(0, axis=1, inplace=True)

mapper

Unnamed: 0,split,id
0,dev-clean,5536-43363-0006
1,dev-clean,5536-43363-0010
2,dev-clean,5536-43363-0007
3,dev-clean,5536-43363-0014
4,dev-clean,5536-43363-0001
...,...,...
31919,train-clean-100,839-130898-0069
31920,train-clean-100,839-130898-0046
31921,train-clean-100,839-130898-0049
31922,train-clean-100,839-130898-0097


In [7]:
tmp = df.merge(mapper)
tmp.head()

Unnamed: 0,audio_path,mels_path,embeds_path,wav_len,mel_len,text,mels,embeds,id,split
0,audio-5536-43363-0010.npy,mel-5536-43363-0010.npy,embed-5536-43363-0010.npy,156480,783,NO DOUBT MANY PREDICTIONS HAVE BEEN COLORED TO...,"[[-3.113764, -3.4454412, -4.0, -3.9672627, -3....","[0.0, 0.011862858, 0.072159685, 0.0, 0.0046737...",5536-43363-0010,dev-clean
1,audio-5536-43363-0007.npy,mel-5536-43363-0007.npy,embed-5536-43363-0007.npy,148320,742,AT EVERY MEAL TIME A DISH OF FOOD WAS PLACED U...,"[[-2.687537, -3.5373487, -4.0, -3.3967917, -3....","[0.0, 0.0, 0.11203939, 0.0, 0.0, 0.0, 0.075544...",5536-43363-0007,dev-clean
2,audio-5536-43363-0014.npy,mel-5536-43363-0014.npy,embed-5536-43363-0014.npy,171840,860,AT THE AGE OF ABOUT SEVENTY FIVE YEARS HE SAVE...,"[[-2.7382286, -3.8348107, -4.0, -3.4696324, -2...","[0.0, 0.00030589534, 0.06469827, 0.0, 0.0, 0.0...",5536-43363-0014,dev-clean
3,audio-5536-43363-0001.npy,mel-5536-43363-0001.npy,embed-5536-43363-0001.npy,103680,519,THEREFORE HE COURTS DEATH IN BATTLE ON THE OTH...,"[[-3.9332054, -4.0, -3.2723053, -3.0335386, -3...","[0.0, 0.0, 0.06820358, 0.0, 0.0, 0.0, 0.034092...",5536-43363-0001,dev-clean
4,audio-5536-43363-0017.npy,mel-5536-43363-0017.npy,embed-5536-43363-0017.npy,134400,673,AT ANOTHER TIME WHEN I WAS FOURTEEN YEARS OLD ...,"[[-3.8848734, -3.7270896, -3.6729581, -3.31327...","[0.0, 0.011425146, 0.12177499, 0.0, 0.00867847...",5536-43363-0017,dev-clean


In [23]:
tmp.shape

(12613, 10)

In [8]:
tmp.split.value_counts()

train-clean-100    10280
dev-clean           2333
Name: split, dtype: int64

In [9]:
data = tmp[tmp.split=="dev-clean"][["text", "mels", "embeds", "mel_len"]].to_numpy()
np.savez("data/synthesizer_librispeech_valid.npz", data=data)

data = tmp[tmp.split!="dev-clean"][["text", "mels", "embeds", "mel_len"]].to_numpy()
np.savez("data/synthesizer_librispeech_train.npz", data=data)