In [5]:
import numpy as np 
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import tarfile
import os
import shutil
import librosa

In [8]:
tar_file_path = 'C:/Users/ibrah/Downloads/audio_dataset/flac_D_aa.tar'
with tarfile.open(tar_file_path, 'r') as tar:
    tar.extractall(path='C:/Users/ibrah/Downloads/audio_dataset')


  tar.extractall(path='C:/Users/ibrah/Downloads/audio_dataset')


In [None]:
import tensorflow_io as tfio


def time_stretch(audio, rate=1.2):
    audio_np = audio.numpy()
    stretched = librosa.effects.time_stretch(audio_np, rate)
    return tf.convert_to_tensor(stretched, dtype=tf.float32)

def pitch_shift(audio, sample_rate=16000, n_steps=2):
    audio_np = audio.numpy()
    shifted = librosa.effects.pitch_shift(audio_np, sample_rate, n_steps=n_steps)
    return tf.convert_to_tensor(shifted, dtype=tf.float32)

def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio)) * noise_factor
    audio_with_noise = audio + noise
    return tf.convert_to_tensor(audio_with_noise, dtype=tf.float32)

def shift_audio(audio, max_shift=0.5, sample_rate=16000):
    shift = np.random.randint(-int(max_shift * sample_rate), int(max_shift * sample_rate))
    audio_shifted = tf.roll(audio, shift, axis=0)
    return audio_shifted

def change_volume(audio, gain_db_range=(-6, 6)):
    gain_db = np.random.uniform(*gain_db_range)
    audio = audio * (10**(gain_db / 20))
    return tf.clip_by_value(audio, -1.0, 1.0)

def augment_audio(audio):
    augmentations = [time_stretch, pitch_shift, add_noise, shift_audio, change_volume]
    augmentation = np.random.choice(augmentations)
    if augmentation == time_stretch:
        aug_audio = time_stretch(audio, rate=np.random.uniform(0.8, 1.2))
    elif augmentation == pitch_shift:
        aug_audio = pitch_shift(audio, n_steps=np.random.randint(-2, 3))
    elif augmentation == add_noise:
        aug_audio = add_noise(audio)
    elif augmentation == shift_audio:
        aug_audio = shift_audio(audio)
    elif augmentation == change_volume:
        aug_audio = change_volume(audio)
    return audio, aug_audio



In [None]:
def to_mel_spectrogram(audio, sample_rate=44100, n_mels=128, fmax=1000):
    y, sr = librosa.load(audio, sr=sample_rate)
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    mel_spectrogram = np.expand_dims(mel_spectrogram, axis=-1)
    return mel_spectrogram


In [None]:
def load_audio(filename, sample_rate=16000):
    audio = tf.io.read_file(filename)
    audio, _ = tf.audio.decode_wav(audio, desired_channels=1)
    audio = tf.squeeze(audio, axis=-1)  # Remove the last dimension
    audio = tf.cast(audio, tf.float32)
    audio = tfio.audio.resample(audio, rate_in=44100, rate_out=sample_rate)  # Resample audio
    return audio

In [None]:
audio_dataset = 'C:/Users/ibrah/Downloads/audio_dataset/flac_D'
audio_files = [f for f in os.listdir(audio_dataset) if f.endswith('.flac')]
aug_dataset = []
for file in audio_files:
    audio_path = os.path.join(audio_dataset, file)
    audio = load_audio(audio_path)
    aug_audio = augment_audio(audio)
    mel_spec = to_mel_spectrogram(aug_audio)
    aug_dataset.append(mel_spec)
spectrograms = np.array(aug_dataset)

In [6]:
labels = pd.read_csv('C:/Users/ibrah/Downloads/audio_dataset/protocols/ASVspoof5.dev.track_1.tsv', sep='\t')
labels = labels.head(47400)
labels

Unnamed: 0,D_0062 D_0000000001 F - - - AC1 A11 spoof -
0,D_0755 D_0000000022 F - - - AC3 A16 spoof -
1,D_0106 D_0000000043 M - - - AC2 A15 spoof -
2,D_5368 D_0000000064 M - - - AC2 A12 spoof -
3,D_3166 D_0000000085 M - - - AC2 A15 spoof -
4,D_4932 D_0000000106 M - - - AC2 A16 spoof -
...,...
47395,D_0375 D_0000995317 M - - - - bonafide bonafide -
47396,D_1956 D_0000995338 F - - - - bonafide bonafide -
47397,D_5214 D_0000995359 M - - - AC1 A13 spoof -
47398,D_2809 D_0000995380 M - - - AC1 A11 spoof -


In [7]:
labels.columns=['labels']
labels['labels'] = labels['labels'].apply(lambda x: 1 if 'spoof' in x.lower() else (0 if 'bonafide' in x.lower() else x))


In [8]:
new_rows = []

for index, row in labels.iterrows():
    new_rows.append(row)  
    new_rows.append(row)  

new_labels = pd.DataFrame(new_rows)

In [None]:
x = spectrograms
y = np.array(new_labels)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_normalized = scaler.fit_transform(x.reshape(-1, x.shape[-1])).reshape(x.shape)


In [None]:
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

x_train_full, x_val, y_train_full, y_val = train_test_split(x_normalized, y, test_size=0.1, random_state=42)
x_train, x_test, y_train, y_test = train_test_split(x_train_full, y_train_full, test_size=0.1, random_state=42)
y_train = to_categorical(y_train, num_classes=2)  
y_val = to_categorical(y_val, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)


In [None]:
specrnet = keras.models.Sequential([
    keras.layers.Conv2D(32, (3,3), initializer='lecun', activation='selu', input_shape=x_train.shape[1:]),
    keras.layers.MaxPooling((2,2)),
    keras.layers.Conv2D(64, (3,3), initializer='lecun', activation='selu', input_shape=x_train.shape[1:]),
    keras.layers.MaxPooling((2,2)),
    keras.layers.Conv2D(128, (3,3), initializer='lecun', activation='selu', input_shape=x_train.shape[1:]),
    keras.layers.MaxPooling((2,2)),
    keras.layers.Conv2D(256, (3,3), initializer='lecun', activation='selu', input_shape=x_train.shape[1:]),
    keras.layers.MaxPooling((2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='selu', initializer='lecun'),
    keras.layers.Dropout(0.45),
    keras.layers.Dense(1, activation='sigmoid')
])

specrnet.compile(optimizer='nadam', loss='binary_crossentropy', metrics=['accuracy'])
specrnet.summary()
history = specrnet.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val))
val_loss, val_acc = specrnet.evaluate(x_test, y_test)