In [1]:
import os
import pandas as pd

In [2]:
this_dir = os.getcwd()
work_dir = os.path.dirname(this_dir)
data_dir = os.path.join(work_dir, 'data')

os.makedirs(data_dir, exist_ok=True)

In [3]:
##### setting area #####

consonant = 'c' 

# mean values of the consonant

cog = 7000

fri_dur = 174

# means of cog and frication duration

'''
ts: 4000, 96
tc: 7000, 96
s: 4000, 174
c: 7000, 174

Transcription for file names:

tʂ: ts
tɕ: tc
ʂ: s
ɕ: c

'''

vowel = 'i'

# formants
f_vals = [3372, 2761, 437] # f3, f2, f1

'''
i: 3372, 2761, 437
ɪ: 3053, 2365, 483
e: 3047, 2530, 536
ɛ: 2979, 2058, 731
u: 2735, 1105, 459
ʊ: 2827, 1225, 519
o: 2828, 1035, 555
ɔ: 2824, 1136, 781

Transcription for file names:

i: i *
ɪ: L
e: e *
ɛ: F
u: u *
ʊ: W
o: o *
ɔ: D

*: used in training

'''

word = vowel + consonant + vowel

# no. of tokens for each word
sample_size = 8000

### Consonant synthesis


In [4]:
import numpy as np
from scipy.stats import truncnorm

In [5]:
c_f_means = np.array([cog, fri_dur])
c_f_stds = np.array([500, 13])

con_means = np.array([200, 0.5, 1, 50, 60]) # sta_dev, skewness, kurtosis, bur_int, fri_int
con_stds = con_means * 0.05

con_means = np.concatenate((c_f_means, con_means))
con_stds = np.concatenate((c_f_stds, con_stds))

consonants = np.zeros((sample_size, len(con_means)))
for i in range(len(con_means)):
    a, b = - 2*con_stds[i] / con_stds[i], 2*con_stds[i] / con_stds[i]
    dist = truncnorm(a, b, loc = con_means[i], scale = con_stds[i])
    consonants[:, i] = dist.rvs(size = sample_size)

In [6]:
# total duration of fixed value 200 for consonants
con_dur = np.full((sample_size, 1), 200)

# zero values for all other features
zeros = np.full((sample_size, 9), 0)

# all concatenated
consonants = np.hstack((consonants, con_dur, zeros))

print(consonants[0])

[6.39202153e+03 1.59413894e+02 1.80659129e+02 5.12548945e-01
 1.00966245e+00 5.17780215e+01 5.43419749e+01 2.00000000e+02
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 0.00000000e+00]


### Vowel synthesis

In [7]:
vow_means = np.array([80, 200, 170, 110 ,90]) # voc_int, f0, b3, b2, b1
vow_means = np.append(vow_means, f_vals) # *, f3, f2, f1
vow_stds = vow_means * 0.05

vowels = np.zeros((sample_size, len(vow_means)))
for i in range(len(vow_means)):
    a, b = - 2*vow_stds[i] / vow_stds[i], 2*vow_stds[i] / vow_stds[i]
    dist = truncnorm(a, b, loc = vow_means[i], scale = vow_stds[i])
    vowels[:, i] = dist.rvs(size = sample_size)

In [8]:
# vocalic duration and total duration of fixed value 400 for vowels
vow_dur = np.full((sample_size, 2), 400)

# zero values for all other features
zeros = np.full((sample_size, 7), 0)

# all concatenated
vowels = np.hstack((zeros, vow_dur, vowels))

print(vowels[0])

[   0.            0.            0.            0.            0.
    0.            0.          400.          400.           80.58371291
  194.5556988   174.093556    114.74691289   93.93733349 3193.8479763
 2855.30020509  415.27899351]


### A csv file as guideline

In [9]:
element = [
    'cog', 'fri_dur', 'sta_dev', 'skewness', 'kurtosis', 'bur_int', 'fri_int', 
    'tot_dur', 'voc_dur', 'voc_int',
    'f0', 'b3', 'b2', 'b1', 'f3', 'f2', 'f1'
]

explanation = [
    'center of gravity', 'frication duration', 'standard deviation', 'skewness', 'kurtosis', 'burst intensity', 'frication intensity',
    'total duration', 'vocalic duration', 'vocalic intensity',
    'fundamental frequency', 'bandwidth of f3', 'bandwidth of f2', 'bandwidth of f1', 'f3', 'f2', 'f1'
]

mean = [
    '4000, 7000', '96, 174', '200', '0.5', '1', '50', '60',
    '200 for con, 400 for vow', '0 for con, 400 for vow', '0 for con, 80 for vow',
    '200', '170', '110', '90', '', '', '']


random = ['gaussian'] * 7 + ['fixed'] * 2 + ['gaussian'] * 8

structure = pd.DataFrame({
    'element': element,
    'explanation': explanation, 
    'mean': mean,
    'random': random,
    'consonant_sample': consonants[0],
    'consonant': consonant,
    'vowel_sample': vowels[0],
    'vowel': vowel
})

file_name = os.path.join(data_dir, 'structure_sample.csv')
print(file_name)
structure.to_csv(file_name, index=True)

/mnt/data/Projects/phongen/data/structure_sample.csv


### Save as 3*17 .npy

In [10]:
metadata = []

subdata_dir = os.path.join(data_dir, word)
os.makedirs(subdata_dir, exist_ok=True)

for i in range(sample_size):
    uid = word + f'_{i+1:04d}'
    filename = f'{uid}.npy'
    save_path = os.path.join(subdata_dir, filename)
    
    vcv = np.vstack([vowels[i], consonants[i], vowels[i]])

    # np.save(save_path, vcv)
    
    cog = vcv[1][0]
    fri_dur = vcv[1][1]
    
    save_path_rel = os.path.relpath(save_path, start=work_dir)

    metadata.append({
        'uid': uid,
        'path': save_path_rel,
        'cog': cog,
        'fri_dur': fri_dur,
        'word': word
    })

metaframe = pd.DataFrame(metadata)

csv_name = word + '_meta.csv'
csv_path = os.path.join(data_dir, csv_name)
# metaframe.to_csv(csv_path, index=False)

### All data

In [11]:
import os
import pandas as pd
import numpy as np
from scipy.stats import truncnorm

np.random.seed(42)

this_dir = os.getcwd()
work_dir = os.path.dirname(this_dir)
data_dir = os.path.join(work_dir, 'data')

os.makedirs(data_dir, exist_ok=True)

consonant_list = ['ts', 'tc', 's', 'c']
consonant_cog = [4000, 7000, 4000, 7000]
consonant_fd = [96, 96, 174, 174]

vowel_list = ['i', 'L', 'e', 'F', 'u', 'W', 'o', 'D']
vowel_formants = [
    [3372, 2761, 437],
    [3053, 2365, 483],
    [3047, 2530, 536],
    [2979, 2058, 731],
    [2735, 1105, 459],
    [2827, 1225, 519],
    [2828, 1035, 555],
    [2824, 1136, 781]
]

# no. of tokens for each word
sample_size = 8000

metadata = []

for index in range (4):
    for jndex in range (8):

        consonant = consonant_list[index]
        cog = consonant_cog[index]
        fri_dur = consonant_fd[index]

        vowel = vowel_list[jndex]
        f_vals = vowel_formants[jndex]

        word = vowel + consonant + vowel

        if (index in [0, 3]) and (jndex in [0, 2, 4, 6]):
            istrain = 'yes'
        else:
            istrain = 'no'

        c_f_means = np.array([cog, fri_dur])
        c_f_stds = np.array([500, 13])

        con_means = np.array([200, 0.5, 1, 50, 60]) # sta_dev, skewness, kurtosis, bur_int, fri_int
        con_stds = con_means * 0.05

        con_means = np.concatenate((c_f_means, con_means))
        con_stds = np.concatenate((c_f_stds, con_stds))

        consonants = np.zeros((sample_size, len(con_means)))
        for i in range(len(con_means)):
            a, b = - 2*con_stds[i] / con_stds[i], 2*con_stds[i] / con_stds[i]
            dist = truncnorm(a, b, loc = con_means[i], scale = con_stds[i])
            consonants[:, i] = dist.rvs(size = sample_size)
        
        # total duration of fixed value 200 for consonants
        con_dur = np.full((sample_size, 1), 200)

        # zero values for all other features
        zeros = np.full((sample_size, 9), 0)

        # all concatenated
        consonants = np.hstack((consonants, con_dur, zeros))

        vow_means = np.array([80, 200, 170, 110 ,90]) # voc_int, f0, b3, b2, b1
        vow_means = np.append(vow_means, f_vals) # *, f3, f2, f1
        vow_stds = vow_means * 0.05

        vowels = np.zeros((sample_size, len(vow_means)))
        for i in range(len(vow_means)):
            a, b = - 2*vow_stds[i] / vow_stds[i], 2*vow_stds[i] / vow_stds[i]
            dist = truncnorm(a, b, loc = vow_means[i], scale = vow_stds[i])
            vowels[:, i] = dist.rvs(size = sample_size)

        # vocalic duration and total duration of fixed value 400 for vowels
        vow_dur = np.full((sample_size, 2), 400)

        # zero values for all other features
        zeros = np.full((sample_size, 7), 0)

        # all concatenated
        vowels = np.hstack((zeros, vow_dur, vowels))

        subdata_dir = os.path.join(data_dir, word)
        os.makedirs(subdata_dir, exist_ok=True)

        for i in range(sample_size):
            uid = word + f'_{i+1:04d}'
            filename = f'{uid}.npy'
            save_path = os.path.join(subdata_dir, filename)
            
            vcv = np.vstack([vowels[i], consonants[i], vowels[i]])

            np.save(save_path, vcv)
            
            save_path_rel = os.path.relpath(save_path, start=work_dir)

            metadata.append({
                'uid': uid,
                'path': save_path_rel,
                'cog': vcv[1][0],
                'fri_dur': vcv[1][1],
                'word': word,
                'consonant': consonant,
                'vowel': vowel,
                'train': istrain
            })

csv_name = 'metadata.csv'
csv_path = os.path.join(data_dir, csv_name)
metaframe = pd.DataFrame(metadata)
metaframe.to_csv(csv_path, index=False)


In [12]:
savetest = np.load(save_path)

print(savetest)

[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 4.00000000e+02
  4.00000000e+02 8.29308996e+01 1.99760273e+02 1.79552239e+02
  1.14265773e+02 9.15326853e+01 2.98373450e+03 1.16668291e+03
  7.48867907e+02]
 [6.65778618e+03 1.70221202e+02 1.81112683e+02 4.60560255e-01
  1.00853328e+00 4.72338549e+01 6.08201071e+01 2.00000000e+02
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 4.00000000e+02
  4.00000000e+02 8.29308996e+01 1.99760273e+02 1.79552239e+02
  1.14265773e+02 9.15326853e+01 2.98373450e+03 1.16668291e+03
  7.48867907e+02]]


In [16]:
x = np.load("./../data/itsi/itsi_0001.npy")
x.shape

(3, 17)

## All Data (one file)

In [18]:
import os
import pandas as pd
import numpy as np
from scipy.stats import truncnorm

np.random.seed(42)

this_dir = os.getcwd()
work_dir = os.path.dirname(this_dir)
data_dir = os.path.join(work_dir, 'data')

# os.makedirs(data_dir, exist_ok=True)

consonant_list = ['ts', 'tc', 's', 'c']
consonant_cog = [4000, 7000, 4000, 7000]
consonant_fd = [96, 96, 174, 174]

vowel_list = ['i', 'L', 'e', 'F', 'u', 'W', 'o', 'D']
vowel_formants = [
    [3372, 2761, 437],
    [3053, 2365, 483],
    [3047, 2530, 536],
    [2979, 2058, 731],
    [2735, 1105, 459],
    [2827, 1225, 519],
    [2828, 1035, 555],
    [2824, 1136, 781]
]

# no. of tokens for each word
sample_size = 8000

metadata = []
vcv_list = []
for index in range (4):
    for jndex in range (8):

        consonant = consonant_list[index]
        cog = consonant_cog[index]
        fri_dur = consonant_fd[index]

        vowel = vowel_list[jndex]
        f_vals = vowel_formants[jndex]

        word = vowel + consonant + vowel

        if (index in [0, 3]) and (jndex in [0, 2, 4, 6]):
            istrain = 'yes'
        else:
            istrain = 'no'

        c_f_means = np.array([cog, fri_dur])
        c_f_stds = np.array([500, 13])

        con_means = np.array([200, 0.5, 1, 50, 60]) # sta_dev, skewness, kurtosis, bur_int, fri_int
        con_stds = con_means * 0.05

        con_means = np.concatenate((c_f_means, con_means))
        con_stds = np.concatenate((c_f_stds, con_stds))

        consonants = np.zeros((sample_size, len(con_means)))
        for i in range(len(con_means)):
            a, b = - 2*con_stds[i] / con_stds[i], 2*con_stds[i] / con_stds[i]
            dist = truncnorm(a, b, loc = con_means[i], scale = con_stds[i])
            consonants[:, i] = dist.rvs(size = sample_size)
        
        # total duration of fixed value 200 for consonants
        con_dur = np.full((sample_size, 1), 200)

        # zero values for all other features
        zeros = np.full((sample_size, 9), 0)

        # all concatenated
        consonants = np.hstack((consonants, con_dur, zeros))

        vow_means = np.array([80, 200, 170, 110 ,90]) # voc_int, f0, b3, b2, b1
        vow_means = np.append(vow_means, f_vals) # *, f3, f2, f1
        vow_stds = vow_means * 0.05

        vowels = np.zeros((sample_size, len(vow_means)))
        for i in range(len(vow_means)):
            a, b = - 2*vow_stds[i] / vow_stds[i], 2*vow_stds[i] / vow_stds[i]
            dist = truncnorm(a, b, loc = vow_means[i], scale = vow_stds[i])
            vowels[:, i] = dist.rvs(size = sample_size)

        # vocalic duration and total duration of fixed value 400 for vowels
        vow_dur = np.full((sample_size, 2), 400)

        # zero values for all other features
        zeros = np.full((sample_size, 7), 0)

        # all concatenated
        vowels = np.hstack((zeros, vow_dur, vowels))

        subdata_dir = os.path.join(data_dir, word)
        os.makedirs(subdata_dir, exist_ok=True)
        
        for i in range(sample_size):
            uid = word + f'_{i+1:04d}'
            filename = f'{uid}.npy'
            # save_path = os.path.join(subdata_dir, filename)
            
            vcv = np.vstack([vowels[i], consonants[i], vowels[i]])
            outer_idx = len(vcv_list)
            vcv_list.append(vcv)

            # np.save(save_path, vcv)
            
            # save_path_rel = os.path.relpath(save_path, start=work_dir)
            save_path_rel = outer_idx

            metadata.append({
                'uid': uid,
                'path': save_path_rel,
                'cog': vcv[1][0],
                'fri_dur': vcv[1][1],
                'word': word,
                'consonant': consonant,
                'vowel': vowel,
                'train': istrain
            })

csv_name = 'metadata.csv'
csv_path = os.path.join(data_dir, csv_name)
metaframe = pd.DataFrame(metadata)
metaframe.to_csv(csv_path, index=False)

vcv_all = np.stack(vcv_list, axis=0)   # shape (sample_size, 3, 17)
save_path = os.path.join(data_dir, "all_data.npy")
np.save(save_path, vcv_all)

In [21]:
x = np.load(os.path.join(data_dir, "all_data.npy"))

In [22]:
x.shape 

(256000, 3, 17)