In [None]:
from google.colab import drive
drive.mount('/content/drive')

import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split

# 주파수 특성 추출 함수 (이전과 동일)
max_length = 15

def extract_features(audio_file, max_length):
    y, sr = librosa.load(audio_file, sr=None)
    if len(y) < max_length:
        y = np.pad(y, (0, max_length - len(y)), 'constant')
    elif len(y) > max_length:
        y = y[:max_length]
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
    return features

def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

#승연부탁
from keras.layers import BatchNormalization

# 모델 정의
model=Sequential()

model.add(Conv1D(filters=128,kernel_size=5,strides=1,padding="same",activation="relu",input_shape=(X_train.shape[1], 1)))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=5,strides=2,padding='same'))

model.add(Conv1D(filters=64,kernel_size=5,strides=1,padding="same",activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=5,strides=2,padding='same'))

model.add(Conv1D(filters=32,kernel_size=5,strides=1,padding="same",activation="relu"))
model.add(BatchNormalization())
model.add(MaxPooling1D(pool_size=5,strides=2,padding='same'))

model.add(Dropout(0.3))
model.add(Flatten())

# 더 큰 Dense 계층
model.add(Dense(units=len(set(np.concatenate((ai_labels, human_labels))))*10, activation="relu"))
# Dropout 추가
model.add(Dropout(0.3))

# 출력 계층 - 클래스 수와 동일한 유닛 수가 필요합니다.
# softmax 활성화 함수를 사용하여 각 클래스에 대한 확률을 출력합니다.
output_units = len(set(np.concatenate((ai_labels, human_labels))))
if output_units == 2:
    # 이진 분류의 경우 한 개의 출력 유닛과 시그모이드 활성화 함수를 사용할 수 있습니다.
    model.add(Dense(units = 1, activation = 'sigmoid'))
else:
    # 다중 클래스 분류의 경우 각 클래스에 대한 하나의 출력 유닛과 소프트맥스 활성화 함수를 사용합니다.
    model.add(Dense(units = output_units, activation = 'softmax'))

# 컴파일 및 학습
if output_units == 2:
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
else:
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

history=model.fit(X_train,y_train,batch_size=X_train.shape[0]//10 ,epochs = 30 ,validation_data=(X_test,y_test))


#===================================================================================


#조용한 부분 삭제 + 슬라이딩 + loss 해결
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np
from keras.optimizers import SGD

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_files, max_length, window_hop):
    features = []
    print(audio_files)
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)
            sr_per_window = int(sr * max_length)
            sr_hop = int(sr * window_hop)
            # 슬라이딩 윈도우로 자르기
            for i in range(0, len(y) - sr_per_window + 1, sr_hop):
                window = y[i:i+sr_per_window]
                mfccs = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=window, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=window, sr=sr)
                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array



def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)


ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai_padding/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human_padding/{folder}', 'mp3'))


# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length, window_hop=window_hop)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length, window_hop=window_hop)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

print(X_train_test.shape)
print(y_train_test.shape)
# 데이터를 학습 데이터와 검증 데이터로 분할
X_train,X_test,y_train,y_test = train_test_split(X_train_test,y_train_test,test_size=0.2)


# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=5, strides=1,
                 padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.25))  # Dropout 추가
model.add(Conv1D(filters=64, kernel_size=5, strides=1,
                 padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.25))  # Dropout 추가
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))

# 모델 컴파일 시 학습률 조정 및 EarlyStopping 추가
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

opt = Adam(learning_rate=0.0001)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)  # 조기 종료 설정

# 더 낮은 학습률과 모멘텀을 사용하는 SGD 최적화
optimizer = SGD(lr=0.001, momentum=0.9)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# 데이터 정규화
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 컴파일 시 학습률 조정
from keras.optimizers import Adam

opt = Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 라벨 변환 제거 (sparse_categorical_crossentropy 사용시 필요 없음)
# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)

history=model.fit(X_train_scaled,y_train,batch_size=16,
                  epochs=30,
                  validation_data=(X_test_scaled,y_test),
                  callbacks=[early_stopping])  # EarlyStopping 콜백 추가


#===================================================================================


#조용한 부분 삭제 + 슬라이딩 + loss 해결
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np
from keras.optimizers import SGD

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_files, max_length, window_hop):
    features = []
    print(audio_files)
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)
            sr_per_window = int(sr * max_length)
            sr_hop = int(sr * window_hop)
            # 슬라이딩 윈도우로 자르기
            for i in range(0, len(y) - sr_per_window + 1, sr_hop):
                window = y[i:i+sr_per_window]
                mfccs = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=window, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=window, sr=sr)
                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array



def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)


ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai_padding/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human_padding/{folder}', 'mp3'))


# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length, window_hop=window_hop)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length, window_hop=window_hop)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

print(X_train_test.shape)
print(y_train_test.shape)
# 데이터를 학습 데이터와 검증 데이터로 분할
X_train,X_test,y_train,y_test = train_test_split(X_train_test,y_train_test,test_size=0.2)


# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.5))  # 더 많은 Dropout
model.add(Conv1D(filters=32, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.5))  # 더 많은 Dropout
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))

# 더 낮은 학습률과 모멘텀을 사용하는 SGD 최적화
optimizer = SGD(lr=0.001, momentum=0.9)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# 데이터 정규화
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 모델 컴파일 시 학습률 조정
from keras.optimizers import Adam

opt = Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 라벨 변환 제거 (sparse_categorical_crossentropy 사용시 필요 없음)
# y_train = to_categorical(y_train)
# y_test = to_categorical(y_test)

history=model.fit(X_train_scaled,y_train,batch_size=16,epochs=30,validation_data=(X_test_scaled,y_test))


#=================================================================================


# AI 음성파일들
ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai/{folder}', 'mp3'))

# 사람 음성파일들
human_subfolders = ['cy', 'sy', 'hs']
human_audio_files = []
for folder in human_subfolders:
    human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human/{folder}', 'mp3'))

ai_features = [extract_features(audio_file,max_length) for audio_file in ai_audio_files]
human_features = [extract_features(audio_file,max_length) for audio_file in human_audio_files]

ai_labels = np.zeros(len(ai_features))
human_labels = np.ones(len(human_features))

X_train_test= np.vstack(ai_features + human_features)
y_train_test= np.concatenate((ai_labels, human_labels))

print("Shape of X_train_test:", X_train_test.shape)
print("Shape of y_train_test:", y_train_test.shape)

# 학습 데이터와 테스트 데이터 분리
X_train,X_test,y_train,y_test = train_test_split(X_train_test,y_train_test,test_size=0.2)


#==============================================================================


#조용한 부분삭제 버전+슬라이딩
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_files, max_length, window_hop):
    features = []
    print(audio_files)
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)
            sr_per_window = int(sr * max_length)
            sr_hop = int(sr * window_hop)
            # 슬라이딩 윈도우로 자르기
            for i in range(0, len(y) - sr_per_window + 1, sr_hop):
                window = y[i:i+sr_per_window]
                mfccs = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=window, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=window, sr=sr)
                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array



def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)


ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai_padding/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human_padding/{folder}', 'mp3'))


# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length, window_hop=window_hop)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length, window_hop=window_hop)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

print(X_train_test.shape)
print(y_train_test.shape)
# 데이터를 학습 데이터와 검증 데이터로 분할
X_train,X_test,y_train,y_test = train_test_split(X_train_test,y_train_test,test_size=0.2)


# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Conv1D(filters=32, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))  # num_classes를 클래스 수로 설정

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = to_categorical(y_train)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test = to_categorical(y_test)

history=model.fit(X_train,y_train,batch_size=16,epochs=30,validation_data=(X_test,y_test))


#==============================================================================


#슬라이딩거
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_files, max_length, window_hop):
    features = []
    print(audio_files)
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)
            sr_per_window = int(sr * max_length)
            sr_hop = int(sr * window_hop)
            # 슬라이딩 윈도우로 자르기
            for i in range(0, len(y) - sr_per_window + 1, sr_hop):
                window = y[i:i+sr_per_window]
                mfccs = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=window, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=window, sr=sr)
                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array



def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)


ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human/{folder}', 'mp3'))


# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length, window_hop=window_hop)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length, window_hop=window_hop)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

print(X_train_test.shape)
print(y_train_test.shape)
# 데이터를 학습 데이터와 검증 데이터로 분할
X_train,X_test,y_train,y_test = train_test_split(X_train_test,y_train_test,test_size=0.2)


# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Conv1D(filters=32, kernel_size=5, strides=1, padding="same", activation="relu"))
model.add(MaxPooling1D(pool_size=5, strides=2, padding='same'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))  # num_classes를 클래스 수로 설정

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = to_categorical(y_train)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test = to_categorical(y_test)

history=model.fit(X_train,y_train,batch_size=16,epochs=30,validation_data=(X_test,y_test))


#=========================================================================


import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_files, max_length, window_hop):
    features = []
    print(audio_files)
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)
            sr_per_window = int(sr * max_length)
            sr_hop = int(sr * window_hop)
            # 슬라이딩 윈도우로 자르기
            for i in range(0, len(y) - sr_per_window + 1, sr_hop):
                window = y[i:i+sr_per_window]
                mfccs = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=window, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=window, sr=sr)
                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array

def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai_padding/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human_paddiing/{folder}', 'mp3'))

# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length, window_hop=window_hop)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length, window_hop=window_hop)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

print(X_train_test.shape)
print(y_train_test.shape)
# 데이터를 학습 데이터와 검증 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(X_train_test, y_train_test, test_size=0.2)

# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=32, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))  # num_classes를 클래스 수로 설정

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 데이터 형태 변경
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = to_categorical(y_train)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test = to_categorical(y_test)

# 모델 학습
history = model.fit(X_train, y_train, batch_size=16, epochs=30, validation_data=(X_test, y_test))


#==========================================================================


import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split

# 주파수 특성 추출 함수 (슬라이딩 윈도우 사용)
max_length = 5  # 각 슬라이딩 윈도우의 길이 (초)
window_hop = 0.5   # 윈도우를 이동시키는 간격 (초)

def extract_features(audio_data):
    features = []
    for audio in audio_data:
        try:
            # 오디오 파일의 유효성을 검사
            if not librosa.util.valid_audio(audio):
                print(f"Invalid audio file: {audio}")
                continue

            sr = librosa.get_samplerate(audio)
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
            chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
            spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
            audio_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))
            features.append(audio_features)
        except Exception as e:
            print(f"Error processing audio: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)
    return features_array


def get_files_from_dir(path, file_format):
    # 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files = []
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human/{folder}', 'mp3'))

import librosa
import numpy as np
import glob
import os

cut_human_audio_data = []
cut_ai_audio_data = []
min_length = float('inf')  # 무한대로 초기화

# 모든 오디오 파일을 최단 길이로 자르고 자른 데이터를 cut_audio_data 배열에 추가
for audio_file in human_audio_files:
    y, sr = librosa.load(audio_file, sr=None)
    if len(y) < min_length:
        min_length = len(y)

for audio_file in human_audio_files:
    y, sr = librosa.load(audio_file, sr=None)
    if len(y) > min_length:
        start = (len(y) - min_length) // 2  # 중앙에서 시작하여 자릅니다.
        end = start + min_length
        cut_audio = y[start:end]
        cut_human_audio_data.append(cut_audio)

min_length = float('inf')  # 무한대로 초기화
for audio_file in ai_audio_files:
    y, sr = librosa.load(audio_file, sr=None)
    if len(y) < min_length:
        min_length = len(y)

for audio_file in ai_audio_files:
    y, sr = librosa.load(audio_file, sr=None)
    if len(y) > min_length:
        start = (len(y) - min_length) // 2  # 중앙에서 시작하여 자릅니다.
        end = start + min_length
        cut_audio = y[start:end]
        cut_ai_audio_data.append(cut_audio)
# 인간 음성 데이터를 WAV 파일로 저장
for i, audio in enumerate(cut_human_audio_data):
    output_path = f'human_audio_{i}.wav'  # 저장할 WAV 파일 경로
    librosa.output.write_wav(output_path, audio, sr=sr)

# AI 음성 데이터를 WAV 파일로 저장
for i, audio in enumerate(cut_ai_audio_data):
    output_path = f'ai_audio_{i}.wav'  # 저장할 WAV 파일 경로
    librosa.output.write_wav(output_path, audio, sr=sr)

print(cut_ai_audio_data)
# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(cut_ai_audio_data)
ai_labels_train_test_fullset = np.ones(ai_features_train_test_fullset.shape[0])

# 인간 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(cut_human_audio_data)
human_labels_train_test_fullset = np.zeros(human_features_train_test_fullset.shape[0])

# 데이터셋 합치기
X = np.concatenate((ai_features_train_test_fullset, human_features_train_test_fullset))
y = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))
print(human_labels_train_test_fullset.shape)
print(ai_features_train_test_fullset.shape)
# 데이터를 훈련 세트와 테스트 세트로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

# 모델 구축
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# 모델 컴파일
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 모델 훈련
model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)

# 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy*100:.2f}%")


#=========================================================================


#자르는것
import librosa
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras.utils import to_categorical
import glob
import os
from sklearn.model_selection import train_test_split
import numpy as np

# 음성 파일을 일정한 길이로 자르는 함수
def preprocess_audio(audio_files, target_length):
    processed_audios = []
    for audio_file in audio_files:
        try:
            y, sr = librosa.load(audio_file, sr=None)

            if len(y) > target_length:
                # 음성 길이가 더 길 경우, 중간을 기준으로 양쪽을 잘라서 target_length로 만듭니다.
                start = (len(y) - target_length) // 2
                end = start + target_length
                y = y[start:end]
            elif len(y) < target_length:
                # 음성 길이가 더 짧을 경우, 앞뒤로 0으로 패딩하여 target_length로 만듭니다.
                padding = target_length - len(y)
                y = np.pad(y, (padding // 2, padding - padding // 2), mode='constant')

            processed_audios.append(y)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    return processed_audios

# 주파수 특성 추출 함수 (일정한 길이로 자른 음성 파일 사용)
max_length = 5  # 각 음성 파일의 목표 길이 (초)

def extract_features(audio_files, max_length):
    features = []
    for audio_file in audio_files:
        try:
            audios = preprocess_audio([audio_file], int(max_length * sr))

            for y in audios:
                mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                chroma = librosa.feature.chroma_stft(y=y, sr=sr)
                spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

                window_features = np.concatenate((mfccs.flatten(), chroma.flatten(), spectral_contrast.flatten()))

                features.append(window_features)
        except Exception as e:
            print(f"Error processing {audio_file}: {str(e)}")

    # 특성을 배열로 변환
    features_array = np.array(features)

    if len(features_array.shape) == 1:
        features_array = np.expand_dims(features_array, axis=0)

    return features_array



# 나머지 코드는 그대로 사용합니다.

# 주어진 경로와 그 하위 경로에서 특정 형식의 파일들을 찾습니다.
def get_files_from_dir(path, file_format):
    return glob.glob(os.path.join(path, f'**/*.{file_format}'), recursive=True)

ai_subfolders = ['sm', 'sy', 'mh']
ai_audio_files = []
for folder in ai_subfolders:
    ai_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/ai/{folder}', 'mp3'))

human_subfolders = ['cy', 'sy', 'hs']
human_audio_files=[]
for folder in human_subfolders:
  human_audio_files.extend(get_files_from_dir(f'/content/drive/MyDrive/PBL/ai_voice/human/{folder}', 'mp3'))

# AI 음성 특징 추출 및 레이블 생성
ai_features_train_test_fullset = extract_features(ai_audio_files, max_length=max_length)
ai_labels_train_test_fullset = np.zeros(len(ai_features_train_test_fullset))

# 사람 음성 특징 추출 및 레이블 생성
human_features_train_test_fullset = extract_features(human_audio_files, max_length=max_length)
# 두 배열 중 하나의 열 크기를 다른 배열과 일치시키기
if ai_features_train_test_fullset.shape[1] != human_features_train_test_fullset.shape[1]:
    target_shape = (human_features_train_test_fullset.shape[0], ai_features_train_test_fullset.shape[1])
    human_features_train_test_fullset = np.resize(human_features_train_test_fullset, target_shape)

human_labels_train_test_fullset = np.ones(len(human_features_train_test_fullset))

# 학습 데이터와 테스트 데이터 결합 및 레이블 결합
X_train_test = np.vstack((ai_features_train_test_fullset, human_features_train_test_fullset))
y_train_test = np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))

# 데이터를 학습 데이터와 검증 데이터로 분할
X_train, X_test, y_train, y_test = train_test_split(X_train_test, y_train_test, test_size=0.2)

# 나머지 코드...

# 데이터 형태 변경
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = to_categorical(y_train)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test = to_categorical(y_test)

# 모델 정의 및 컴파일
num_classes = len(set(np.concatenate((ai_labels_train_test_fullset, human_labels_train_test_fullset))))

model = Sequential()
model.add(Conv1D(filters=32, kernel_size=5, strides=1, padding="same", activation="relu", input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(Flatten())
model.add(Dense(units=num_classes, activation="softmax"))  # num_classes를 클래스 수로 설정

# 모델 컴파일
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# 모델 학습
history = model.fit(X_train, y_train, batch_size=16, epochs=30,
                    validation_data=(X_test,y_test))


#==============================================================================


# 모델 정의
model=Sequential()
model.add(Conv1D(filters=64,kernel_size=5,strides=1,padding="same",activation="relu",input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5,strides=2,padding='same'))
model.add(Conv1D(filters=32,kernel_size=5,strides=1,padding="same",activation="relu"))
model.add(MaxPooling1D(pool_size=5,strides=2,padding='same'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(units=len(set(np.concatenate((ai_labels, human_labels)))),activation="softmax"))


# 모델 컴파일 및 학습
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
y_train = to_categorical(y_train)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
y_test = to_categorical(y_test)

history=model.fit(X_train,y_train,batch_size=16,epochs=30,validation_data=(X_test,y_test))

# 예측 및 결과 출력
test_audio_file = "/content/drive/MyDrive/PBL/ai_voice/ai/ai_phone_call_noise_cancel.wav"
test_features = extract_features(test_audio_file, max_length)
test_features = test_features.reshape(1, test_features.shape[0], 1)
prediction = model.predict(test_features)
predicted_class = np.argmax(prediction)  # 가장 높은 확률을 가진 클래스 선택

confidence = prediction[0][predicted_class] * 100  # 선택된 클래스의 확률

if predicted_class == 0:
    print(f"테스트 음성은 AI 음성입니다. (확신도: {confidence:.2f}%)")
else:
    print(f"테스트 음성은 인간 음성입니다. (확신도: {confidence:.2f}%)")


#=============================================================================


import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')

# Show the plot
plt.tight_layout()