<a href="https://colab.research.google.com/github/Bhaskar-459/deep_fake_web/blob/main/Deepfake_audio_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install kaggle



In [None]:
from google.colab import files
files.upload()  # Select kaggle.json

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"abhirammaddula","key":"2d58d93338b9ad1ad9b0ef6e40ab1e04"}'}

In [None]:
!kaggle datasets download -d awsaf49/asvpoof-2019-dataset --unzip

Dataset URL: https://www.kaggle.com/datasets/awsaf49/asvpoof-2019-dataset
License(s): ODC Attribution License (ODC-By)
Downloading asvpoof-2019-dataset.zip to /content
100% 23.6G/23.6G [04:40<00:00, 179MB/s]
100% 23.6G/23.6G [04:40<00:00, 90.3MB/s]


In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.utils import to_categorical
from tensorflow.data import Dataset
import librosa.display
import random

# Enable mixed precision for better performance
tf.keras.mixed_precision.set_global_policy("mixed_float16")

# Ensure TensorFlow is using GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU is available and ready!")
    except RuntimeError as e:
        print(e)


DATASET_PATH = "LA/LA/ASVspoof2019_LA_train/flac"
LABEL_FILE_PATH = "LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"


NUM_CLASSES = 2
SAMPLE_RATE = 16000
DURATION = 4
N_MELS = 128
MAX_TIME_STEPS = 120

# Load labels
labels = {}
with open(LABEL_FILE_PATH, 'r') as label_file:
    lines = label_file.readlines()
for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 1 if parts[-1] == "bonafide" else 0
    labels[file_name] = label

# Data augmentation functions
def add_random_noise(audio):
    """Add small Gaussian noise to the audio."""
    noise = np.random.normal(0, 0.005, audio.shape)
    return audio + noise

def time_mask(spec, num_masks=1, mask_max_size=20):
    """Randomly mask time steps in the spectrogram."""
    for _ in range(num_masks):
        t = random.randint(0, mask_max_size)
        t0 = random.randint(0, spec.shape[1] - t)
        spec[:, t0:t0 + t] = 0
    return spec

def freq_mask(spec, num_masks=1, mask_max_size=20):
    """Randomly mask frequency bins in the spectrogram."""
    for _ in range(num_masks):
        f = random.randint(0, mask_max_size)
        f0 = random.randint(0, spec.shape[0] - f)
        spec[f0:f0 + f, :] = 0
    return spec

# Feature extraction function
def load_mel_spectrogram(file_name, label):
    file_path = os.path.join(DATASET_PATH, file_name + ".flac")


    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)


    if random.random() > 0.5:
        audio = add_random_noise(audio)


    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)


    if random.random() > 0.5:
        mel_spectrogram = time_mask(mel_spectrogram)
    if random.random() > 0.5:
        mel_spectrogram = freq_mask(mel_spectrogram)



    if mel_spectrogram.shape[1] < MAX_TIME_STEPS:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, MAX_TIME_STEPS - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :MAX_TIME_STEPS]



    mel_spectrogram = (mel_spectrogram - np.min(mel_spectrogram)) / (np.max(mel_spectrogram) - np.min(mel_spectrogram))



    mel_spectrogram = np.expand_dims(mel_spectrogram, axis=-1)
    mel_spectrogram = np.repeat(mel_spectrogram, 3, axis=-1)  # Convert to 3-channel

    return mel_spectrogram.astype(np.float32), np.array(label, dtype=np.int32)



def data_generator():
    for file_name, label in labels.items():
        yield load_mel_spectrogram(file_name, label)

dataset = Dataset.from_generator(data_generator, output_signature=(
    tf.TensorSpec(shape=(N_MELS, MAX_TIME_STEPS, 3), dtype=tf.float32),
    tf.TensorSpec(shape=(), dtype=tf.int32)
))



dataset = dataset.shuffle(1000)
train_size = int(0.8 * len(labels))
train_dataset = dataset.take(train_size).batch(16).prefetch(tf.data.AUTOTUNE)
val_dataset = dataset.skip(train_size).batch(16).prefetch(tf.data.AUTOTUNE)



input_shape = (N_MELS, MAX_TIME_STEPS, 3)
model_input = Input(shape=input_shape)



base_model = ResNet50(weights="imagenet", include_top=False, input_tensor=model_input)


x = GlobalAveragePooling2D()(base_model.output)
x = BatchNormalization()(x)


x = Dense(128, activation='relu')(x)
x = Dropout(0.4)(x)
model_output = Dense(NUM_CLASSES, activation='softmax', dtype='float32')(x)


model = Model(inputs=model_input, outputs=model_output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),  # Reduced learning rate
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])



lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1)


model.fit(train_dataset, epochs=15, validation_data=val_dataset, callbacks=[lr_scheduler])  # More epochs


model.save("resnet50_audio_classifier.keras")

GPU is available and ready!
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/15
   1269/Unknown [1m413s[0m 234ms/step - accuracy: 0.9647 - loss: 0.1286



[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m717s[0m 475ms/step - accuracy: 0.9647 - loss: 0.1285 - val_accuracy: 1.0000 - val_loss: 0.0014 - learning_rate: 1.0000e-04
Epoch 2/15
[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m591s[0m 456ms/step - accuracy: 0.9573 - loss: 0.3218 - val_accuracy: 0.9994 - val_loss: 0.0025 - learning_rate: 1.0000e-04
Epoch 3/15
[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m669s[0m 494ms/step - accuracy: 0.9502 - loss: 0.4014 - val_accuracy: 1.0000 - val_loss: 6.7829e-04 - learning_rate: 1.0000e-04
Epoch 4/15
[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m619s[0m 478ms/step - accuracy: 0.9535 - loss: 0.3469 - val_accuracy: 1.0000 - val_loss: 3.1993e-04 - learning_rate: 1.0000e-04
Epoch 5/15
[1m1269/1269[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m625s[0m 481ms/step - accuracy: 0.9502 - loss: 0.3433 - val_accuracy: 0.9858 - val_loss: 0.0406 - learning_rate: 1.0000e-04
Epoch 6/15
[1m1269/126

In [None]:
from google.colab import files
files.download('resnet50_audio_classifier.keras')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>