특징 추출 및 모델 학습

In [1]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
# 특징추출_mffcc_chroma_tonnetz

import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import glob
import os
from imblearn.over_sampling import SMOTE

def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

# 데이터 증강 함수
def augment_data(y, sr):
    y_changed = librosa.effects.time_stretch(y, rate=0.81)
    y_changed = librosa.effects.pitch_shift(y_changed, sr=sr, n_steps=4)

    return y_changed

def extract_features(audio_files):
    features = []

    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file)

            # 원본 데이터에 대한 특성 추출(n_fft, hop_length 감소)
            mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_fft=1024, hop_length=256), axis=1)
            chroma_cqt = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=256), axis=1)
            tonnetz = np.mean(librosa.feature.tonnetz(y=y, sr=sr), axis=1)

            original_features = np.hstack([mfccs, chroma_cqt, tonnetz])

            features.append(original_features)

            # 증강된 데이터에 대한 특성 추출(n_fft, hop_length 감소)
            y_augmented = augment_data(y, sr)

            mfccs_augmented = np.mean(librosa.feature.mfcc(y=y_augmented, sr=sr, n_fft=1024, hop_length=256), axis=1)
            chroma_cqt_augmented = np.mean(librosa.feature.chroma_cqt(y=y_augmented, sr=sr, hop_length=256), axis=1)
            tonnetz_augmented = np.mean(librosa.feature.tonnetz(y=y_augmented, sr=sr), axis=1)

            augmented_features = np.hstack([mfccs_augmented, chroma_cqt_augmented, tonnetz_augmented])

            features.append(augmented_features)

        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    return np.array(features)

ai_audio_files=get_files_from_dir('/content/drive/MyDrive/음성데이터셋/03fin48_ai_koen_padding_cut_no_noise', 'wav')
human_audio_files=get_files_from_dir('/content/drive/MyDrive/음성데이터셋/03fin48_human_koen_padding_cut_no_noise', 'wav')

# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset=extract_features(ai_audio_files)
ai_labels_train_test_fullset=np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset=extract_features(human_audio_files)
human_labels_train_test_fullset=np.ones(len(human_features_train_test_fullset))

X_train,X_val,y_train,y_val=train_test_split(np.concatenate([ai_features_train_test_fullset,human_features_train_test_fullset]),
                                             np.concatenate([ai_labels_train_test_fullset,human_labels_train_test_fullset]),test_size=0.2)

In [None]:
# CNN 모델

# 원-핫 인코딩을 위해 y 값을 변환
from keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)

# 데이터 차원 재조정 (Conv1D 레이어는 3차원 입력을 기대합니다)
X_train = np.expand_dims(X_train, axis=2)
X_val = np.expand_dims(X_val, axis=2)

from keras.layers import BatchNormalization, Conv1D

model = Sequential()
model.add(Conv1D(64, kernel_size=3, activation='relu', padding='same', input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Conv1D(128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(2, activation='softmax'))

optimizer = Adam(learning_rate=0.001)

model.compile(loss='categorical_crossentropy', optimizer=optimizer ,metrics=['accuracy'])

print(model.summary())

early_stopping = EarlyStopping(monitor='val_loss', patience=10)

history=model.fit(X_train,y_train,batch_size=64, epochs=100, validation_data=(X_val,y_val))


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 38, 64)            256       
                                                                 
 batch_normalization (Batch  (None, 38, 64)            256       
 Normalization)                                                  
                                                                 
 max_pooling1d (MaxPooling1  (None, 19, 64)            0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 19, 64)            0         
                                                                 
 conv1d_1 (Conv1D)           (None, 19, 128)           24704     
                                                                 
 batch_normalization_1 (Bat  (None, 19, 128)           5

In [None]:
import pickle

#모델 저장
model.save('/content/drive/MyDrive/코드/model_fin.h5')  # h5 확장자를 사용합니다.

# history 저장
with open('/content/drive/MyDrive/코드/history.pkl', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

  saving_api.save_model(
