# Drive Mount

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import librosa
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import IPython
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Reshape,MaxPooling2D, Dropout, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import *

#  Preprocess

In [None]:
def extract_melspectrograms(audio_path, sr=16000, n_mels=128, hop_length=512):
    features = []
    labels = []
    lengths = []
    lst = [folder for folder in os.listdir(audio_path) if 'Fake' in folder or 'Real' in folder]
    for folder in lst:
        print(folder)
        folder_path = os.path.join(audio_path, folder)
        for file in tqdm(os.listdir(folder_path)):
            file_path = os.path.join(folder_path, file)
            try:
                # 오디오 파일 로드
                audio, _ = librosa.load(file_path, sr=sr)

                # 1초당 샘플 수 계산
                samples_per_second = sr
                num_segments = int(len(audio) / samples_per_second)
                for i in range(num_segments):
                    start_sample = i * samples_per_second
                    end_sample = start_sample + samples_per_second
                    segment = audio[start_sample:end_sample]

                    # 멜-스펙트로그램 추출
                    mel_spec = librosa.feature.melspectrogram(y=segment, sr=sr, n_mels=n_mels, hop_length=hop_length)
                    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
                    features.append(mel_spec_db)
                    lengths.append(mel_spec_db.shape[1])

                # 라벨 할당
                if folder.startswith('Fake'):
                    labels.extend([1] * num_segments)  # 1은 가짜
                elif folder.startswith('Real'):
                    labels.extend([0] * num_segments)  # 0은 진짜
            except Exception as e:
                print(f"파일을 파싱하는 동안 오류 발생: {file_path}")
                continue

    # 모든 멜-스펙트로그램의 길이를 최대 길이에 맞춤
    max_length = max(lengths)
    features = np.array([np.pad(x, ((0, 0), (0, max_length - x.shape[1])), mode='constant') if x.shape[1] < max_length else x for x in features])
    features = np.expand_dims(features, axis=-1)
    return np.array(features), np.array(labels)

audio_paths = '/content/drive/MyDrive/KT_BP/Data/DeepVoiceDataset'

X, y = extract_melspectrograms(audio_paths)

print("Features shape:", X.shape)
print("Labels shape:", y.shape)

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = .2, random_state=202405, stratify=y)
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size = .2, random_state=202405, stratify=y_train)

## Modeling

In [None]:
tf.keras.backend.clear_session() # 사용된 세션 초기화

model = Sequential([

    # 첫번째 convolutional layer
    Conv2D(32, (4, 4), strides=(1, 1), padding='same', activation='relu', input_shape=x_train.shape[1:]),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),

    # 두번째 convolutional layer
    Conv2D(48, (5, 5), strides=(1, 1), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),

    # 세번째 convolutional layer
    Conv2D(64, (4, 4), strides=(1, 1), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),

    # 네번째 convolutional layer
    Conv2D(128, (2, 4), strides=(1, 1), padding='same', activation='relu'),
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2)),

    # Flatten layer
    Flatten(),

    # Fully connected layer
    Dense(128, activation='relu'),

    # Output layer
    Dense(1, activation='sigmoid')
])


In [None]:
model.compile(optimizer='adam', loss = 'binary_crossentropy', metrics  = ['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
model.fit(x_train, y_train, epochs = 100, validation_split=0.2, callbacks=[early_stopping])

# Evaluation

In [None]:
predictions = model.predict(x_test)

y_pred = (predictions > 0.5).astype(int)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)