In [1]:

import os
import math
import torch
import torchaudio
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt


In [None]:

# GPU 할당 변경하기
GPU_NUM = 4  # 원하는 GPU 번호 입력
device = torch.device(f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu')

torch.cuda.set_device(device) # change allocation of current GPU
print ('Current cuda device ', torch.cuda.current_device()) # check


In [4]:
file_path = '../data/'
file = 'audio_000200.wav'

NFFT = 512
Freq = 16000


### zero padding 부분 제거

In [10]:
#zero padding 시작되는 col 찾는 함수
def binary_search( stft ):
    min, max = 65, stft.shape[1]-2
    
    while True:
        mid = (min+max) // 2
        if   column(stft, mid) is 'true': return mid
        elif column(stft, mid) is 'up'  : min = mid+1
        elif column(stft, mid) is 'down': max = mid-1
    

    
    
def column( stft, col ):
    s = stft[:, col:col+2]
    
    target1 = set( s[:,0] )
    target2 = set( s[:,1] )
    
    if   target1 != {0j} and target2 == {0j}:   return 'true' #target = mid
    elif target1 != {0j} and target2 != {0j}:   return 'up'   #target > mid
    elif target1 == {0j} and target2 == {0j}:   return 'down' #target < mid
    
    
    
    
# torchaudio.transforms.timestretch의 input size에 맞춰주는 함수
def timestretch_input(real, imag):
    res = []
    
    for idx in range(real.shape[0]):
        tmp = [real[idx,:],  imag[idx,:]]
        tmp = np.asarray(tmp)
        tmp = np.transpose(tmp)
        res.append(tmp)
    
    res = torch.as_tensor(res)
    return res
    

In [11]:

x_data = ['S_left.npy', 'S_right.npy', 'S_left_phase.npy','S_right_phase.npy']
y_data = np.load( file_path+ 'angle.npy')


S_left  = np.load( file_path + x_data[0] )
S_right = np.load( file_path + x_data[1] )
S_left_phase  = np.load( file_path + x_data[2] )
S_right_phase = np.load( file_path + x_data[3] )


 S_left, S_right
 S_left_phase, S_right_phase 
 --> STFT_left, STFT_right
 
 mag, phase에서 stft로 변환 후 timestretch input size에 맞춤

In [12]:

STFT_left  = []
STFT_right = []
columns = []


for idx in range(y_data.shape[0]):
    
    # zero padding 된 부분 제거
    col = binary_search(S_left[:,:,idx]) + 1
    columns.append(col)
    

    
  #mag, phase
    """R"""
    radius_left  = S_left[:,:col,idx] 
    radius_right = S_right[:,:col,idx]
    
    
    """theta"""
    theta_left  = S_left_phase[:,:col,idx]
    theta_right = S_right_phase[:,:col,idx]
    
    cos_left = np.real( theta_left )
    sin_left = np.imag( theta_left )
    
    cos_right = np.real( theta_right )
    sin_right = np.imag( theta_right )
    
    
    
  #stft
    """ R*theta """
    stft_left_real = radius_left  * cos_left  
    stft_left_imag = radius_left  * sin_left
    
    stft_right_real = radius_right * cos_right
    stft_right_imag = radius_right * sin_right
    
    
    
    left  = timestretch_input(stft_left_real, stft_left_imag)
    right = timestretch_input(stft_right_real, stft_right_imag)

    
    STFT_left.append(left.to('cuda:4'))
    STFT_right.append(right.to('cuda:4'))

    
print('done...')


done...


### time stretch 

In [None]:
# 1. zero padding 된 부분 제거
# 2. shape -> (257, cols)
# 3. fixed_rate = cols / 382 

# 4. output size -> (257, 382 +remain)
# 5. remain 제거 -> (257, 382)

In [15]:
#time stretch

import torch.nn as nn
import torchaudio.transforms as transforms



strchd_left  = torch.empty([1,257,382]).to('cuda:4')
strchd_right = torch.empty([1,257,382]).to('cuda:4')
strchd_left_phase  = torch.empty([1,257,382]).to('cuda:4')
strchd_right_phase = torch.empty([1,257,382]).to('cuda:4')




for idx in range(y_data.shape[0]):
    
   """1/fixed_rate 만큼 시간축 늘어나게 됨 """
    fixed_rate = math.ceil( columns[idx] / 382 * 100 * 0.95) / 100
    
    # timestretch
    aug1 = nn.Sequential( 
        transforms.TimeStretch( hop_length=250, n_freq=(NFFT//2)-1,
                                fixed_rate=fixed_rate ) )
    
    aug1 = aug1.cuda()
    out_left  = aug1(STFT_left[idx])
    out_right = aug1(STFT_right[idx])
    
    
    
    """stft --> mag, phase"""
    m_left,  ph_left  = torchaudio.functional.magphase(out_left)
    m_right, ph_right = torchaudio.functional.magphase(out_right)
    
    ml = m_left.reshape(1,257,m_left.shape[1])
    pl = ph_left.reshape(1,257,m_left.shape[1])
    mr = m_right.reshape(1,257,m_left.shape[1])
    pr = ph_right.reshape(1,257,m_left.shape[1])
    
    
    
    #(1000, 257, 382)
    strchd_left  = torch.cat( [strchd_left,  ml[:,:,:382]], dim=0 )
    strchd_right = torch.cat( [strchd_right, mr[:,:,:382]], dim=0 )
    strchd_left_phase  = torch.cat( [strchd_left_phase,  pl[:,:,:382]], dim=0 )
    strchd_right_phase = torch.cat( [strchd_right_phase, pr[:,:,:382]], dim=0 )

    
print('done...')

done...


In [17]:
torch.save( strchd_left[1:],  '../data/strchd_left.pt' )
torch.save( strchd_right[1:], '../data/strchd_right.pt' )
torch.save( strchd_left_phase[1:],  '../data/strchd_left_phase.pt' )
torch.save( strchd_right_phase[1:], '../data/strchd_right_phase.pt' )