### 라이브러리

In [1]:
import pandas as pd
import numpy as np
import os
import shutil
from tqdm import tqdm
from glob import glob
import librosa
import warnings
warnings.filterwarnings("ignore")

import json

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (Input, Convolution2D, BatchNormalization, Flatten,
                                     Dropout, Dense, AveragePooling2D, Add)
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

### 데이터 input / 전처리

In [3]:
def load_data(paths):
    result = []
    for path in tqdm(paths):
        # sr = 16000이 의미는 1초당 16000개의 데이터를 샘플링
        data, sr = librosa.load(path, sr = 16000)
        result.append(data)
    result = np.array(result) 
    # 메모리가 부족할 때는 데이터 타입을 변경 ex) np.array(data, dtype = np.float32)
    return result

In [4]:
def get_feature(data, sr = 16000, n_fft = 256, win_length = 200, hop_length = 160, n_mels = 64):
    mel = []
    mel_ = librosa.feature.melspectrogram(data, sr = sr, n_fft = n_fft, win_length = win_length, hop_length = hop_length, n_mels = n_mels)
    mel.append(mel_)
    mel = np.array(mel)
    mel = librosa.power_to_db(mel, ref = np.max)
    return mel

In [5]:
#음성들의 길이를 맞춰줌
def set_length(data, d_mini):
    result = []
    for i in data:
        result.append(i[:d_mini])
    result = np.array(result)
    return result

In [6]:
voice_path = glob("./*.wav")
voice_path = load_data(voice_path)
voice_path = np.array(voice_path)
mini = 12320
voice_path = set_length(voice_path, mini)
result = get_feature(voice_path)

100%|██████████| 1/1 [00:00<00:00, 62.67it/s]


In [7]:
#result = result.reshape(result.shape[2], result.shape[1],  -1, 1)
result = result.reshape(-1, 64, 78, 1)
result.shape

(1, 64, 78, 1)

### 기존 모델 호출

In [8]:
model = tf.keras.models.load_model('voice.h5')

In [None]:
model.predict(result)