In [None]:
from google.colab import drive
drive.mount("/content/drive")

!pip install unrar
!unrar x /content/drive/MyDrive/data.rar

import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
DATASET_PATH = "/content/data"
NUM_CLASSES = 2
SAMPLE_RATE = 16000
DURATION = 5
N_MELS = 128
max_time_steps = 109
labels = {}
class_index = 0

for root, dirs, files in os.walk(DATASET_PATH):
    for folder in dirs:
        folder_path = os.path.join(root, folder)
        labels[folder] = class_index
        class_index += 1
X = []
y = []

for root, dirs, files in os.walk(DATASET_PATH):
    for file_name in files:
        file_path = os.path.join(root, file_name)

        audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
        mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

        if mel_spectrogram.shape[1] < max_time_steps:
            mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
        else:
            mel_spectrogram = mel_spectrogram[:, :max_time_steps]

        X.append(mel_spectrogram)
        label = labels[root.split("/")[-1]]
        y.append(label)

X = np.array(X)
y = np.array(y)

y_encoded = to_categorical(y, NUM_CLASSES)

split_index = int(0.8 * len(X))
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y_encoded[:split_index], y_encoded[split_index:]

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Extracting  data/real/LJ017-0147.wav                                      75%  OK 
Extracting  data/real/LJ017-0148.wav                                      75%  OK 
Extracting  data/real/LJ017-0149.wav                                      75%  OK 
Extracting  data/real/LJ017-0150.wav                                      75%  OK 
Extracting  data/real/LJ017-0151.wav                                      75%  OK 
Extracting  data/real/LJ017-0152.wav                                      75%  OK 
Extracting  data/real/LJ017-0153.wav                                      75%  OK 
Extracting  data/real/LJ017-0154.wav                                      75%  OK 
Extracting  data/real/LJ017-0155.wav                                      75%  OK 
Extracting  data/real/LJ017-0156.wav                                      75%  OK 
Extracting  dat

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from keras.layers import Conv2D,MaxPool2D,AveragePooling2D,Dense,Flatten,ZeroPadding2D,BatchNormalization,Activation,Add,Input,Dropout,GlobalAveragePooling2D
model_input = Input(shape=(N_MELS, max_time_steps, 1))
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(model_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
#x=BatchNormalization()(x)
x = Dropout(0.5)(x)

model_output = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=model_input, outputs=model_output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_val, y_val))

model.save("Deepfake_audiog.h5")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
!unzip /content/drive/MyDrive/TestEvaluation.zip > /dev/null

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.models import load_model
TEST_DATASET_PATH = "/content/TestEvaluation"
MODEL_PATH = "Deepfake_audio.h5"
SAMPLE_RATE = 16000
DURATION = 5
N_MELS = 128
MAX_TIME_STEPS = 109

model = load_model(MODEL_PATH)
X_test = []

test_files = os.listdir(TEST_DATASET_PATH)
for file_name in test_files:
    file_path = os.path.join(TEST_DATASET_PATH, file_name)

    try:
        if not file_name.endswith(('.wav', '.mp3', '.flac')):
            continue

        audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
        mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

        if mel_spectrogram.shape[1] < MAX_TIME_STEPS:
            mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, MAX_TIME_STEPS - mel_spectrogram.shape[1])), mode='constant')
        else:
            mel_spectrogram = mel_spectrogram[:, :MAX_TIME_STEPS]

        X_test.append(mel_spectrogram)
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")

X_test = np.array(X_test)

y_pred = model.predict(X_test)

y_pred_classes = np.argmax(y_pred, axis=1)

print("Predicted classes:", y_pred_classes)

Predicted classes: [0 1 1 0 1 1 1 0 1 0]
