In [1]:
from google.colab import files
files.upload()  # This will prompt you to upload the kaggle.json file


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"shadowmonarchsunjin","key":"42bc0f9d1cc262c501d04c8a6d0550fc"}'}

In [2]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d birdy654/deep-voice-deepfake-voice-recognition

Dataset URL: https://www.kaggle.com/datasets/birdy654/deep-voice-deepfake-voice-recognition
License(s): other
Downloading deep-voice-deepfake-voice-recognition.zip to /content
100% 3.67G/3.69G [00:46<00:00, 162MB/s]
100% 3.69G/3.69G [00:46<00:00, 85.1MB/s]


In [4]:
!unzip deep-voice-deepfake-voice-recognition

Archive:  deep-voice-deepfake-voice-recognition.zip
  inflating: DEMONSTRATION/DEMONSTRATION/linus-original-DEMO.mp3  
  inflating: DEMONSTRATION/DEMONSTRATION/linus-to-musk-DEMO.mp3  
  inflating: KAGGLE/AUDIO/FAKE/Obama-to-Biden.wav  
  inflating: KAGGLE/AUDIO/FAKE/Obama-to-Trump.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-Obama.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-Trump.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-linus.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-margot.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-musk.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-ryan.wav  
  inflating: KAGGLE/AUDIO/FAKE/biden-to-taylor.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-biden.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-margot.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-musk.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-obama.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-ryan.wav  
  inflating: KAGGLE/AUDIO/FAKE/linus-to-taylor.wav  
  inflating: KAGGLE/AUDIO/FAKE/linu

In [5]:
import librosa
import numpy as np
import matplotlib.pyplot as plt

def load_audio(file_path, sr=22050):
    audio, sr = librosa.load(file_path, sr=sr)
    return audio, sr
def audio_to_spectrogram(audio, sr):
    spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
    spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
    return spectrogram_db
def plot_spectrogram(spectrogram):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram, sr=sr, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Mel-frequency spectrogram')
    plt.tight_layout()
    plt.show()
import tensorflow as tf
from tensorflow.keras import layers, models

def create_cnn_model(input_shape):
    model = models.Sequential()

    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [15]:
import os
import librosa
import numpy as np

# Set the paths
data_dir = 'KAGGLE/AUDIO/'
classes = ['REAL', 'FAKE']

# Initialize lists for storing data and labels
X = []
y = []

# Loop through each class
for class_label, class_name in enumerate(classes):
    class_dir = os.path.join(data_dir, class_name)

    for filename in os.listdir(class_dir):
        if filename.endswith('.wav'):
            file_path = os.path.join(class_dir, filename)
            audio, sr = librosa.load(file_path, sr=22050)
            spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
            spectrogram = librosa.power_to_db(spectrogram, ref=np.max)

            # Pad or truncate spectrograms to a fixed length
            max_length = 100 # Replace with your desired fixed length
            if spectrogram.shape[1] < max_length:
                spectrogram = np.pad(spectrogram, ((0, 0), (0, max_length - spectrogram.shape[1])), mode='constant')
            else:
                spectrogram = spectrogram[:, :max_length]

            # Store the data and label
            X.append(spectrogram)
            y.append(class_label)

# Convert lists to numpy arrays
y = np.array(y)
X = np.array(X) # Now X should have a consistent shape

In [17]:
from sklearn.model_selection import train_test_split


# Assuming you have prepared your data and labels


xtrain,xtest,ytrain,ytest = train_test_split(X,y,test_size= 0.2) # Labels (0 for real, 1 for deepfake)

# The input shape for a CNN should be (height, width, channels)
# In this case, the spectrogram is 2D (frequency bins x time frames), so we add a channel dimension of 1
input_shape = (xtrain.shape[1], xtrain.shape[2], 1)

model = create_cnn_model(input_shape)
model.fit(xtrain, ytrain, epochs=10, batch_size=32, validation_split=0.2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 4s/step - accuracy: 0.2021 - loss: 15.1207 - val_accuracy: 1.0000 - val_loss: 1.0606e-33
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.8583 - loss: 6.1200 - val_accuracy: 1.0000 - val_loss: 1.0055e-07
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.8417 - loss: 2.2619 - val_accuracy: 0.0000e+00 - val_loss: 6.4397
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1750 - loss: 5.0524 - val_accuracy: 0.9091 - val_loss: 0.6256
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.5229 - loss: 1.4625 - val_accuracy: 1.0000 - val_loss: 0.0052
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.8583 - loss: 0.8519 - val_accuracy: 1.0000 - val_loss: 0.0121
Epoch 7/10
[1m2/2[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x798acf333c40>

In [18]:
loss,accuracy = model.evaluate(xtest,ytest)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 628ms/step - accuracy: 0.8462 - loss: 0.3837


In [19]:
print(accuracy)

0.8461538553237915


In [31]:
model.save('audio.h5')


