In [None]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import librosa
import os
import math
import shutil
import pandas as pd

In [None]:
BASE_PATH = '/kaggle/input/asvpoof-2019-dataset/LA/LA'

In [None]:
train_df = pd.read_csv(f'{BASE_PATH}/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt',
                       sep=" ", header=None)

In [None]:
train_df.head(5)

In [None]:
train_df.columns =['speaker_id','filename','system_id','null','class_name']
train_df.drop(columns=['null'],inplace=True)
train_df.head(5)

In [None]:
train_df['filepath'] = f'{BASE_PATH}/ASVspoof2019_LA_train/flac/'+train_df.filename+'.flac'
train_df['target'] = (train_df.class_name=='spoof').astype('int32')
print('len Train', len(train_df))
train_df.head(5)

In [None]:
import os
import shutil

# Önceden oluşturulmuş klasörleri kontrol et ve gerekiyorsa oluştur
output_folder_0 = './0'  # Klasör adı 0
output_folder_1 = './1'  # Klasör adı 1

if not os.path.exists(output_folder_0):
    os.makedirs(output_folder_0)

if not os.path.exists(output_folder_1):
    os.makedirs(output_folder_1)

# Train veri çerçevesini dön ve dosyaları ilgili klasörlere kopyala
for index, row in train_df.iterrows():
    source_filepath = row['filepath']
    target_folder = f'./{row["target"]}'
    target_filepath = os.path.join(target_folder, os.path.basename(source_filepath))

    # Hedef klasörü kontrol et ve gerekiyorsa oluştur
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Dosyayı hedef klasöre kopyala
    shutil.copy(source_filepath, target_filepath)

# İşlem tamamlandıktan sonra oluşturulan klasörleri kontrol etmek için kullanabilirsiniz
print(f'0 Klasöründe {len(os.listdir(output_folder_0))} dosya var.')
print(f'1 Klasöründe {len(os.listdir(output_folder_1))} dosya var.')


In [None]:
folder_path = './0' 

files = os.listdir(folder_path)

total_files = len(files)

print(f"Klasörde toplam {total_files} dosya var.")


In [None]:
import os


folder_path = './1'  # Değiştirmeniz gereken klasör yolu

files = os.listdir(folder_path)

total_files = len(files)

print(f"Klasörde toplam {total_files} dosya var.")


In [None]:
import random

folder_path = './0'  # Değiştirmeniz gereken klasör yolu

files = os.listdir(folder_path)

total_files = len(files)

# Silinecek dosya sayısı
num_files_to_delete = 1580

files_to_delete = random.sample(files, min(num_files_to_delete, total_files))

for file_name in files_to_delete:
    file_path = os.path.join(folder_path, file_name)
    try:
        os.remove(file_path)
        print(f"{file_name} dosyası silindi.")
    except Exception as e:
        print(f"Hata silinirken: {e}")

remaining_files = os.listdir(folder_path)
print(f"Klasörde kalan toplam {len(remaining_files)} dosya var.")

In [None]:
import random

folder_path = './1' 

files = os.listdir(folder_path)

total_files = len(files)

# Silinecek dosya sayısı
num_files_to_delete = 21800

files_to_delete = random.sample(files, min(num_files_to_delete, total_files))

for file_name in files_to_delete:
    file_path = os.path.join(folder_path, file_name)
    try:
        os.remove(file_path)
        print(f"{file_name} dosyası silindi.")
    except Exception as e:
        print(f"Hata silinirken: {e}")

remaining_files = os.listdir(folder_path)
print(f"Klasörde kalan toplam {len(remaining_files)} dosya var.")

In [None]:
import os
import shutil

source_folders = ['0', '1']

# Hedef klasörün adı
target_folder = 'audio'

if not os.path.exists(target_folder):
    os.makedirs(target_folder)

for source_folder in source_folders:
    source_folder_path = os.path.join('.', source_folder)
    target_folder_path = os.path.join(target_folder, source_folder)

    # Kaynak klasörü var mı diye kontrol et
    if os.path.exists(source_folder_path):
        # Hedef klasörü oluştur
        if not os.path.exists(target_folder_path):
            os.makedirs(target_folder_path)

        # Kaynak klasöründeki dosyaları hedef klasöre taşı
        for file_name in os.listdir(source_folder_path):
            source_file_path = os.path.join(source_folder_path, file_name)
            target_file_path = os.path.join(target_folder_path, file_name)

            try:
                shutil.move(source_file_path, target_file_path)
                print(f"{file_name} dosyası {source_folder} klasöründen {target_folder}/{source_folder} klasörüne taşındı.")
            except Exception as e:
                print(f"Hata taşınırken: {e}")
    else:
        print(f"{source_folder} klasörü bulunamadı.")

print(f"İşlem tamamlandı. '{target_folder}' klasöründe şu dosyalar var:")
print(os.listdir(target_folder))


In [None]:
DATASET_PATH = "/kaggle/working/audio"
JSON_PATH = "data.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):

    
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

        
        if dirpath is not dataset_path:

            
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing: {}".format(semantic_label))

           
            for f in filenames:

                file_path = os.path.join(dirpath, f)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

                
                for d in range(num_segments):

                    
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # mfcc çıkar
                    mfcc = librosa.feature.mfcc(y=signal[start:finish], sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                    mfcc = mfcc.T

                    
                    if len(mfcc) == num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, d+1))

    #  MFCCs leri json olarak kaydet
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)




In [None]:
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)

In [None]:


DATA_PATH = "/kaggle/working/data.json"


def load_data(data_path):

    with open(data_path, "r") as fp:
        data = json.load(fp)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X, y


def plot_history(history):

    fig, axs = plt.subplots(2)

    
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size):

    
    X, y = load_data(DATA_PATH)

    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    
    X_train = X_train[..., np.newaxis]
    X_validation = X_validation[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):

    
    model = keras.Sequential()

    # 1st conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 2nd conv layer
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(keras.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    # 3rd conv layer
    model.add(keras.layers.Conv2D(32, (2, 2), activation='relu'))
    model.add(keras.layers.MaxPooling2D((2, 2), strides=(2, 2), padding='same'))
    model.add(keras.layers.BatchNormalization())

    
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(2, activation='softmax'))

    return model


def predict(model, X, y):

    
    X = X[np.newaxis, ...] # array shape (1, 130, 13, 1)

    
    prediction = model.predict(X)

    
    predicted_index = np.argmax(prediction, axis=1)

    print("Target: {}, Predicted label: {}".format(y, predicted_index))


In [None]:
if __name__ == "__main__":

    
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    
    
    input_shape = (130, 13, 1)
    model = build_model(input_shape)

    
    optimiser = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimiser,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    
    plot_history(history)

    
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print('\nTest accuracy:', test_acc)

    
    X_to_predict = X_test[100]
    y_to_predict = y_test[100]

    
    predict(model, X_to_predict, y_to_predict)



In [None]:
# Modeli kaydet
model.save("/kaggle/working/cnn_audio.h5")

In [None]:
model.summary()

In [None]:
from tensorflow.keras.models import load_model
import tensorflow as tf

In [None]:
def predict_voice(model, audio_file_path, genre_mapping):

    
    signal, sample_rate = librosa.load(audio_file_path, sr=22050)

    
    mfcc = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=13, n_fft=2048, hop_length=512)
    mfcc = mfcc.T

    # MFCC'leri uygun boyuta getir
    mfcc = np.resize(mfcc, (130, 13, 1))

    # Reshape MFCC'leri uygun boyuta gwtir
    mfcc = mfcc[np.newaxis, ...]

   
    prediction = model.predict(mfcc)
    predicted_index = np.argmax(prediction, axis=1)

    
    genre_label = genre_mapping[predicted_index[0]]
    print("Raw prediction:", prediction)

    return genre_label


model_path = "/kaggle/working/cnn_audio.h5"
model = load_model(model_path)


audio_file_path = "/kaggle/input/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_eval/flac/LA_E_1001232.flac"

genre_mapping = {0: "real", 1: "fake"}


predicted_voice = predict_voice(model, audio_file_path, genre_mapping)

print("Predicted genre:", predicted_voice)
