In [5]:
import torchaudio
import os
from multiprocessing import Pool, cpu_count
import math


from paths import *

In [6]:
def collaboration_single_work(my_work_pool, my_det_dir): 
    print("Working from {} to {}".format(my_work_pool[0], my_work_pool[-1]))
    for rec_name in my_work_pool: 
        if torchaudio.info(os.path.join(my_det_dir, rec_name)).num_frames <= 0: 
            print(rec_name)
    print("Work from {} to {} ends".format(my_work_pool[0], my_work_pool[-1]))

In [7]:
class MultiprocessManager: 
    def __init__(self, fun, my_det_dir, num_workers=4): 
        self.fun = fun
        self.my_det_dir = my_det_dir
        self.num_workers = num_workers
    
    def divide_work(self, work):
        # determine the number of items per worker
        items_per_worker = math.ceil(len(work) / self.num_workers)

        # divide the work into chunks
        work_chunks = [work[i:i + items_per_worker] for i in range(0, len(work), items_per_worker)]

        return work_chunks
    
    def collaboration_work(self): 
        flat_tasks = os.listdir(self.my_det_dir)
        task_pools = self.divide_work(flat_tasks)
        print(self.num_workers)
        p = Pool(self.num_workers)
        for i in range(self.num_workers):
            p.apply_async(self.fun, args=(task_pools[i], self.my_det_dir,))
        print('Waiting for all subprocesses done...')
        p.close()
        p.join()
        print('All subprocesses done.')

In [8]:
# n_worker = cpu_count()
# # random sampling
# mpm = MultiprocessManager(collaboration_single_work, 
#                           word_seg_anno_path, 
#                           num_workers=n_worker)

# mpm.collaboration_work()

In [12]:
waveform, sample_rate = torchaudio.load(os.path.join(word_seg_anno_path, "s1901b_00001431.wav"))

In [14]:
waveform.shape

torch.Size([1, 0])

In [25]:
import torch.nn as nn
import torch
import torchaudio
import os

from paths import *

In [26]:
REC_SAMPLE_RATE = 16000
N_FFT = 400

In [27]:
class MyTransform(nn.Module): 
    def __init__(self, sample_rate, n_fft): 
        super().__init__()
        self.transform = torchaudio.transforms.MelSpectrogram(sample_rate, n_fft=n_fft)
    
    def forward(self, waveform): 
        mel_spec = self.transform(waveform)
        mel_spec = mel_spec.squeeze()
        mel_spec = mel_spec.permute(1, 0) # (F, L) -> (L, F)
        return mel_spec

In [28]:
waveform, sample_rate = torchaudio.load(os.path.join(wav_path, "s0101a.wav"))

In [29]:
t = MyTransform(sample_rate=REC_SAMPLE_RATE, n_fft=N_FFT)



In [30]:
mel = t(waveform)

In [31]:
waveform.shape

torch.Size([1, 9969854])

In [32]:
mel

tensor([[0.0000e+00, 3.9037e-09, 2.1019e-08,  ..., 1.6256e-06, 7.4967e-06,
         9.0985e-06],
        [0.0000e+00, 2.2318e-08, 1.2016e-07,  ..., 2.3912e-06, 2.7047e-06,
         3.8634e-06],
        [0.0000e+00, 4.6319e-08, 2.4939e-07,  ..., 1.4409e-06, 2.3818e-06,
         2.8968e-06],
        ...,
        [0.0000e+00, 8.7722e-05, 4.7232e-04,  ..., 5.4108e-06, 2.6126e-06,
         1.5923e-06],
        [0.0000e+00, 1.5115e-04, 8.1386e-04,  ..., 2.8069e-06, 5.8884e-06,
         2.5544e-06],
        [0.0000e+00, 1.1784e-03, 6.3448e-03,  ..., 1.2396e-06, 2.0887e-06,
         2.2797e-06]])

In [33]:
torch.save(mel, "save.pt")

In [1]:
import torchaudio
import os
from multiprocessing import Pool, cpu_count
import math


from paths import *
from preproc_samplecut import open_and_cut

In [2]:
my_wave_dir = wav_path
rec_name = "s0101a.wav"
my_anno_dir = words_extract_path
rec_raw = "s0101a"
my_params = None


In [3]:
cut_recs, corr_df = open_and_cut(
    os.path.join(my_wave_dir, rec_name), 
    os.path.join(my_anno_dir, rec_raw + ".csv"),
    my_params
)

In [4]:
corr_df

Unnamed: 0,rec,idx,start_time,end_time,token,duration,n_frames
0,s0101a,00000000,32.216575,32.622045,okay,0.405470,6488
1,s0101a,00000001,44.617996,44.946848,um,0.328852,5262
2,s0101a,00000002,45.355708,45.501487,i'm,0.145779,2333
3,s0101a,00000003,47.307796,47.530873,lived,0.223077,3569
4,s0101a,00000004,47.530873,47.658958,in,0.128085,2049
...,...,...,...,...,...,...,...
993,s0101a,00000993,621.416324,621.606854,i,0.190530,3049
994,s0101a,00000994,621.606854,621.751955,i,0.145101,2321
995,s0101a,00000995,621.751955,622.197430,preferred,0.445475,7128
996,s0101a,00000996,622.197430,622.767665,family,0.570235,9124
