In [1]:
import librosa
import numpy as np
import pandas as pd
import parselmouth
from parselmouth.praat import call
from scipy.signal import find_peaks, lfilter, hamming
from scipy.io import wavfile
from scipy.fftpack import fft
import Preprocessing.features as features  # features.py 파일을 import
# from Preprocessing.label import labeling
import os
import torch
from torch.utils.data import Dataset, DataLoader, random_split

# 셀에 함수 정의해서 테스트

In [1]:
class WAVDataset(Dataset):
    def __init__(self, wav_path, label_path, max_length):
        """
        초기화 메서드
        :param wav_path: 처리할 WAV 파일의 경로 리스트
        :param max_length: 각 샘플의 최대 길이
        """
        self.wav_path = wav_path
        self.label_path = label_path
        self.max_length = max_length

    def __len__(self):
        """
        데이터셋의 길이 반환
        :return: 데이터셋에 있는 WAV 파일의 수
        """
        return len(self.wav_path)

    def __getitem__(self, idx):
        """
        인덱스에 해당하는 데이터 반환
        :param idx: 인덱스
        ====================================================
        - Preprocessing (features.py 이용)
        - 피처 병합 (merge_features 메서드 이용)
        - 피처 데이터(X)와 라벨(y) 분리
        ====================================================
        :return: 피처 데이터(X)와 라벨(y)을 텐서 형태로 반환
        """
        wav_path = self.wav_path[idx]
        y, sr = librosa.load(wav_path, sr=44100)
        label_path = self.label_path[idx]

        # Preprocessing 과정
        mfcc = features.extract_mfcc(y, sr)
        pitch = features.extract_pitch(y, sr)
        f0_pyworld = features.extract_f0_pyworld(y, sr)
        spectral_flux = features.extract_spectral_flux(y, sr)
        spectral_entropy = features.extract_spectral_entropy(y, sr)
        labeled = labeling(label_path, y, sr)

        # 추출된 feature 병합한 dataframe을 concated_df로 선언 후, return
        # 피처 병합
        features_dict = {
            'mfcc': mfcc,
            'pitch': pitch,
            'f0_pyworld': f0_pyworld,
            'spectral_flux': spectral_flux,
            'spectral_entropy': spectral_entropy,
            'label': labeled
        }
        concatenated_df = self.merge_features(features_dict)

        # 패딩 또는 자르기 적용
        X = self.pad_or_truncate(concatenated_df.values)

        # 라벨과 나머지 데이터 분리
        y = X[:, -1]  # 마지막 열이 라벨 (수정금지)
        X = X[:, :-1]  # 마지막 열을 제외한 나머지가 피처 데이터

        # X와 y를 텐서로 변환
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        return X, y

    def merge_features(self, features_dict):
        # 피처들을 하나의 데이터프레임으로 병합
        df_list = []
        for key, df in features_dict.items():
            # 각 DataFrame의 행 수를 통일
            df_list.append(df)

        # 열 방향으로 병합
        concatenated_df = pd.concat(df_list, axis=1)
        return concatenated_df

    def pad_or_truncate(self, features):
        length, feature_dim = features.shape
        if length > self.max_length:
            return features[:self.max_length]
        elif length < self.max_length:
            pad_width = self.max_length - length
            padding = np.zeros((pad_width, feature_dim))
            return np.vstack((features, padding))
        return features

NameError: name 'Dataset' is not defined

In [11]:
def create_dataloader(wav_path, label_path, max_length, batch_size, shuffle=True):
    dataset = WAVDataset(wav_path, label_path, max_length)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader

In [12]:
wav_dir_train = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Train/Audio'
wav_path_train = [os.path.join(wav_dir_train, file) for file in os.listdir(wav_dir_train) if file.endswith('.wav')]

wav_dir_test = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Test/Audio'
wav_path_test = [os.path.join(wav_dir_test, file) for file in os.listdir(wav_dir_test) if file.endswith('.wav')]

label_dir_train = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Train/Label'
label_path_train = [os.path.join(label_dir_train, file) for file in os.listdir(label_dir_train) if file.endswith('.csv')]

label_dir_test = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Test/Label'
label_path_test = [os.path.join(label_dir_test, file) for file in os.listdir(label_dir_test) if file.endswith('.csv')]

In [13]:
train_dataloader = create_dataloader(wav_path_train, label_path_train, 1000, 32)
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x7fd1ea876150>

In [14]:
train_data = WAVDataset(wav_path_train, label_path_train, 1000)

In [15]:
train_data.label_path[0]

'/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Train/Label/label1.csv'

In [16]:
train_fortest = train_data.__getitem__(0)

NameError: name 'labeling' is not defined

In [None]:
train_fortest

In [None]:
data = pd.DataFrame(train_fortest[0])
target = pd.DataFrame(train_fortest[1])

In [None]:
data

In [None]:
target

# label.py 불러와서 직접 테스트

In [2]:
import os
from Preprocessing.preprocessing import WAVDataset, create_dataloader
from Preprocessing.label import labeling

In [3]:
wav_dir_train = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Train/Audio'
wav_path_train = [os.path.join(wav_dir_train, file) for file in os.listdir(wav_dir_train) if file.endswith('.wav')]

label_dir_train = '/Users/imdohyeon/Documents/PythonWorkspace/Lieon-ai/Dataset/Train/Label'
label_path_train = [os.path.join(label_dir_train, file) for file in os.listdir(label_dir_train) if file.endswith('.csv')]

In [4]:
train_data = WAVDataset(wav_path_train, label_path_train, 1000)

In [5]:
train_fortest = train_data.__getitem__(0)
train_fortest

(tensor([[-5.3997e+02,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
           1.5655e-01,  3.1167e+00],
         [-5.3417e+02,  7.7929e+00,  6.6953e+00,  ...,  1.1458e+02,
           2.0104e-01,  3.4554e+00],
         [-5.2573e+02,  1.6729e+01,  9.7042e+00,  ...,  1.2662e+02,
           2.6027e-01,  3.3969e+00],
         ...,
         [-2.1423e+02,  2.7582e+02, -6.4803e+00,  ...,  1.8113e+02,
           4.9372e+01,  2.8634e+00],
         [-2.2243e+02,  2.7098e+02, -2.3358e+00,  ...,  1.8348e+02,
           6.0851e+01,  3.0236e+00],
         [-2.3712e+02,  2.6463e+02,  1.1535e+01,  ...,  1.7402e+02,
           7.4148e+01,  3.2468e+00]]),
 tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0630, 0.3130, 0.5630, 0.8130, 1.0000, 1.0000,
         1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
         1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
         1.0000, 1.0000, 1.000

In [6]:
data = pd.DataFrame(train_fortest[0])
target = pd.DataFrame(train_fortest[1])

In [7]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,tensor(-539.9658),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),...,tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(0.),tensor(330.9056),tensor(0.),tensor(0.1566),tensor(3.1167)
1,tensor(-534.1747),tensor(7.7929),tensor(6.6953),tensor(5.1128),tensor(3.2563),tensor(1.2652),tensor(-0.7444),tensor(-2.5944),tensor(-4.0361),tensor(-4.8531),...,tensor(-1.1971),tensor(0.2479),tensor(1.6276),tensor(2.7093),tensor(3.3262),tensor(3.4689),tensor(348.3561),tensor(114.5800),tensor(0.2010),tensor(3.4554)
2,tensor(-525.7291),tensor(16.7289),tensor(9.7042),tensor(5.1404),tensor(5.4425),tensor(7.0061),tensor(4.6290),tensor(-2.3610),tensor(-9.1165),tensor(-10.4396),...,tensor(-4.6807),tensor(-1.2163),tensor(4.9206),tensor(8.7436),tensor(7.5005),tensor(3.3866),tensor(360.1469),tensor(126.6197),tensor(0.2603),tensor(3.3969)
3,tensor(-522.8682),tensor(19.1810),tensor(9.0041),tensor(2.9566),tensor(4.9821),tensor(9.7694),tensor(8.9470),tensor(0.5100),tensor(-8.7918),tensor(-10.6177),...,tensor(-3.3780),tensor(-1.2019),tensor(6.2268),tensor(11.5355),tensor(9.3180),tensor(1.7576),tensor(1483.6013),tensor(125.8328),tensor(0.3216),tensor(2.9543)
4,tensor(-516.1061),tensor(26.3599),tensor(11.6610),tensor(3.5414),tensor(6.9101),tensor(13.1156),tensor(10.8554),tensor(-0.6965),tensor(-11.3380),tensor(-11.4047),...,tensor(-0.7451),tensor(1.8441),tensor(9.4612),tensor(14.4770),tensor(11.4162),tensor(2.5383),tensor(505.3890),tensor(126.4195),tensor(0.2264),tensor(2.7023)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,tensor(-229.2042),tensor(271.9182),tensor(9.0550),tensor(-53.7930),tensor(-7.3586),tensor(-20.3147),tensor(-36.1866),tensor(-4.1240),tensor(-0.1540),tensor(-24.4205),...,tensor(9.9180),tensor(-1.8271),tensor(-23.3174),tensor(-7.4465),tensor(5.0248),tensor(-17.3009),tensor(596.6304),tensor(178.8656),tensor(62.1505),tensor(2.9832)
996,tensor(-215.7849),tensor(276.2311),tensor(-2.9505),tensor(-58.0659),tensor(-3.3253),tensor(-23.3470),tensor(-42.3777),tensor(-4.1005),tensor(0.5059),tensor(-27.1802),...,tensor(9.3201),tensor(-2.8646),tensor(-24.8155),tensor(-8.1596),tensor(4.0647),tensor(-18.9013),tensor(599.8652),tensor(179.1306),tensor(39.8304),tensor(2.8326)
997,tensor(-214.2324),tensor(275.8203),tensor(-6.4803),tensor(-58.4869),tensor(1.0009),tensor(-22.7233),tensor(-48.1421),tensor(-6.3076),tensor(4.1171),tensor(-28.1062),...,tensor(8.3781),tensor(1.1161),tensor(-24.1796),tensor(-12.4065),tensor(3.1314),tensor(-16.0518),tensor(600.8179),tensor(181.1261),tensor(49.3715),tensor(2.8634)
998,tensor(-222.4265),tensor(270.9829),tensor(-2.3358),tensor(-55.2374),tensor(-0.3386),tensor(-25.6526),tensor(-51.5149),tensor(-7.8101),tensor(6.6106),tensor(-26.9766),...,tensor(8.1067),tensor(5.4501),tensor(-21.1428),tensor(-15.0327),tensor(0.7161),tensor(-15.0517),tensor(600.8066),tensor(183.4815),tensor(60.8515),tensor(3.0236)


In [8]:
target

Unnamed: 0,0
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
995,2.0
996,2.0
997,2.0
998,2.0
