In [1]:
# Standard Library Imports
import os
import pandas as pd
import numpy as np
import librosa as lb
import numpy as np
from skimage.transform import resize

In [8]:
base_path = 'D:\sem 7\Bismillah Skripsi\Voice Recog\Data\ASVSPOOF 2019\LA'
protocol_dir = os.path.join(base_path, 'ASVspoof2019_LA_cm_protocols')
train_dir = os.path.join(base_path, 'ASVspoof2019_LA_train', 'flac')
dev_dir = os.path.join(base_path, 'ASVspoof2019_LA_dev', 'flac')
eval_dir = os.path.join(base_path, 'ASVspoof2019_LA_eval', 'flac')

In [9]:
def get_file_path(directory, filename):
    return os.path.join(directory, f'{filename}.flac')

In [10]:
def read_dataset(protocol_path, directory):
    """Reads the dataset from a protocol file and returns a DataFrame."""
    df = pd.read_csv(protocol_path, sep=' ', header=None, names=['speaker_id', 'filename', 'system_id', 'null', 'class_name'])
    df['filepath'] = df['filename'].apply(lambda x: get_file_path(directory, x))
    df.drop('null', axis=1, inplace=True)
    df.dropna(inplace=True)
    return df

def label_to_int(class_name):
    if class_name == 'bonafide':
        return 0
    else:
        return 1

def add_columns(df, subset):
    df['target'] = df['class_name'].apply(label_to_int)
    df['subset'] = subset
    return df

def sample_data(df, n_bonafide, n_spoof):
    bonafide = df[df['class_name'] == 'bonafide'].head(n_bonafide)
    spoof = df[df['class_name'] != 'bonafide'].head(n_spoof)
    return pd.concat([bonafide, spoof])

In [11]:
train_df = read_dataset(os.path.join(protocol_dir, 'ASVspoof2019.LA.cm.train.trn.txt'), train_dir)
dev_df = read_dataset(os.path.join(protocol_dir, 'ASVspoof2019.LA.cm.dev.trl.txt'), dev_dir)
eval_df = read_dataset(os.path.join(protocol_dir, 'ASVspoof2019.LA.cm.eval.trl.txt'), eval_dir)

train_df = add_columns(train_df, 'train')
dev_df = add_columns(dev_df, 'dev')
eval_df = add_columns(eval_df, 'eval')


In [12]:
train_df

Unnamed: 0,speaker_id,filename,system_id,class_name,filepath,target,subset
0,LA_0079,LA_T_1138215,-,bonafide,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,0,train
1,LA_0079,LA_T_1271820,-,bonafide,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,0,train
2,LA_0079,LA_T_1272637,-,bonafide,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,0,train
3,LA_0079,LA_T_1276960,-,bonafide,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,0,train
4,LA_0079,LA_T_1341447,-,bonafide,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,0,train
...,...,...,...,...,...,...,...
25375,LA_0098,LA_T_9717580,-,spoof,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,1,train
25376,LA_0098,LA_T_9779814,-,spoof,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,1,train
25377,LA_0098,LA_T_9783312,-,spoof,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,1,train
25378,LA_0098,LA_T_9839348,-,spoof,D:\sem 7\Bismillah Skripsi\Voice Recog\Data\AS...,1,train


In [13]:
def balance_dataset(df, target_column='target', random_state=42):

    # Split the data into two groups based on the target
    class_0 = df[df[target_column] == 0]
    class_1 = df[df[target_column] == 1]

    # Down-sample the majority class to match the size of the minority class
    if len(class_0) > len(class_1):
        class_majority = class_0
        class_minority = class_1
    else:
        class_majority = class_1
        class_minority = class_0

    class_majority_downsampled = class_majority.sample(n=len(class_minority), random_state=random_state)

    # Combine the two classes into one balanced DataFrame
    balanced_df = pd.concat([class_minority, class_majority_downsampled])

    # Shuffle the data to mix the classes
    balanced_df = balanced_df.sample(frac=1, random_state=random_state).reset_index(drop=True)

    # Display the class distribution after down-sampling
    print("Class distribution after balancing:")
    print(balanced_df[target_column].value_counts())

    return balanced_df


In [14]:
train_df = balance_dataset(train_df, target_column='target', random_state=42)
dev_df = balance_dataset(dev_df, target_column='target', random_state=42)

Class distribution after balancing:
target
0    2580
1    2580
Name: count, dtype: int64
Class distribution after balancing:
target
0    2548
1    2548
Name: count, dtype: int64


## DATA TIMESTEP

In [15]:
# Adjusted function to create a Mel spectrogram with time steps
def create_mel_spectrogram_timestep(y, label, sr=22050, n_mels=128, fixed_frames=128, timesteps=32):
    '''
    Create a segmented Mel spectrogram with consistent shape for multiple timesteps.

    Parameters:
        y (np.ndarray): Audio signal.
        label (int): Data label (0 or 1).
        sr (int): Sampling rate of the audio.
        n_mels (int): Number of Mel bands.
        fixed_frames (int): Fixed number of total time frames for padding or truncation.
        timesteps (int): Number of segments to divide the Mel spectrogram into.

    Returns:
        mel_segments (np.ndarray): Mel spectrogram divided into timesteps with shape (timesteps, x-axis, n_mels, 1).
        label (int): The corresponding label for the spectrogram.
    '''
    # Create Mel spectrogram for the full audio
    mel_spectrogram = lb.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spectrogram_db = lb.power_to_db(mel_spectrogram, ref=np.max)

    # Resize the Mel spectrogram to have a fixed total number of frames
    resized_mel_spectrogram = resize(mel_spectrogram_db, (n_mels, fixed_frames), mode='reflect', anti_aliasing=True)

    # Calculate the frame size for each timestep
    segment_length = fixed_frames // timesteps

    # Split the spectrogram into timesteps
    mel_segments = []
    for i in range(timesteps):
        start_idx = i * segment_length
        end_idx = start_idx + segment_length
        segment = resized_mel_spectrogram[:, start_idx:end_idx]
        mel_segments.append(segment)

    mel_segments = np.array(mel_segments)
    mel_segments = np.expand_dims(mel_segments, axis=-1)  # Add channel dimension

    print(f"...Generated segmented mel spectrogram for label = '{label}' | Shape = {mel_segments.shape}")
    return mel_segments, label


#### TRAIN

In [16]:
# Lists to store results
train_samples_step = []
train_labels = []

# Iterate through the rows of the DataFrame
for index, row in train_df.iterrows():
    try:
        # Load the audio file
        y, sr = lb.load(row["filepath"], sr=22050)
        #y_trimmed, _ = lb.effects.trim(y, top_db=40)

        # Determine the label based on the class_name
        label = 0 if row["target"] == 0 else 1

        # Create a single mel spectrogram for the entire audio
        mel_spectrogram, l = create_mel_spectrogram_timestep(y=y, label=label, sr=sr)

        # Append the single spectrogram and its label
        train_samples_step.append(mel_spectrogram)
        train_labels.append(l)
    except Exception as e:
        print(f"Error processing file {row['filepath']}: {e}")

X_train_step = np.array(train_samples_step)
y_train = np.array(train_labels)

...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel s

#### DEV

In [17]:
# Lists to store results
dev_samples_step = []
dev_labels = []

# Iterate through the rows of the DataFrame
for index, row in dev_df.iterrows():
    try:
        # Load the audio file
        y, sr = lb.load(row["filepath"], sr=22050)
       #y_trimmed, _ = lb.effects.trim(y, top_db=40)

        # Determine the label based on the class_name
        label = 0 if row["target"] == 0 else 1

        # Create a single mel spectrogram for the entire audio
        mel_spectrogram, l = create_mel_spectrogram_timestep(y=y, label=label, sr=sr)

        # Append the single spectrogram and its label
        dev_samples_step.append(mel_spectrogram)
        dev_labels.append(l)
    except Exception as e:
        print(f"Error processing file {row['filepath']}: {e}")

X_val_step = np.array(dev_samples_step)
y_val = np.array(dev_labels)


...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '1' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel spectrogram for label = '0' | Shape = (32, 128, 4, 1)
...Generated segmented mel s

In [18]:
# np.savez("train_16step.npz", data=X_train_step, label=y_train)
# np.savez("val_16step.npz", data=X_val_step, label = y_val)

## DATA FULL

In [5]:
import librosa as lb
import numpy as np
from skimage.transform import resize

# Adjusted function to create a padded Mel spectrogram
def create_mel_spectrogram_full(y, label, sr=22050, n_mels=128, fixed_frames=128):
    '''
    Create a single Mel spectrogram with consistent shape for the full duration of an audio file.

    Parameters:
        y (np.ndarray): Audio signal.
        label (int): Data label (0 or 1).
        sr (int): Sampling rate of the audio.
        n_mels (int): Number of Mel bands.
        fixed_frames (int): Fixed number of time frames for padding or truncation.

    Returns:
        padded_mel_spectrogram (np.ndarray): Mel spectrogram in dB scale with consistent shape.
        label (int): The corresponding label for the spectrogram.
    '''
    # Create Mel spectrogram for the full audio
    mel_spectrogram = lb.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spectrogram_db = lb.power_to_db(mel_spectrogram, ref=np.max)

    padded_mel_spectrogram = resize(mel_spectrogram_db, (fixed_frames, fixed_frames), mode='reflect', anti_aliasing=True)
    mel_segments = np.array(padded_mel_spectrogram)
    mel_segments = np.expand_dims(mel_segments, axis=-1)

    print(f"...Generated padded mel spectrogram for label = '{label}' | Shape = {mel_segments.shape}")
    return mel_segments, label


#### TRAIN

In [None]:
# Lists to store results
train_samples = []
train_labels = []

# Iterate through the rows of the DataFrame
for index, row in train_df.iterrows():
    try:
        # Load the audio file
        y, sr = lb.load(row["filepath"], sr=22050)
        #y_trimmed, _ = lb.effects.trim(y, top_db=40)

        # Determine the label based on the class_name
        label = 0 if row["target"] == 0 else 1

        # Create a single mel spectrogram for the entire audio
        mel_spectrogram, l = create_mel_spectrogram_full(y=y, label=label, sr=sr)

        # Append the single spectrogram and its label
        train_samples.append(mel_spectrogram)
        train_labels.append(l)
    except Exception as e:
        print(f"Error processing file {row['filepath']}: {e}")

X_train = np.array(train_samples)
y_train = np.array(train_labels)

#### DEV

In [40]:
# Lists to store results
dev_samples = []
dev_labels = []

# Iterate through the rows of the DataFrame
for index, row in dev_df.iterrows():
    try:
        # Load the audio file
        y, sr = lb.load(row["filepath"], sr=22050)
        #y_trimmed, _ = lb.effects.trim(y, top_db=40)

        # Determine the label based on the class_name
        label = 0 if row["target"] == 0 else 1

        # Create a single mel spectrogram for the entire audio
        mel_spectrogram, l = create_mel_spectrogram_full(y=y, label=label, sr=sr)

        # Append the single spectrogram and its label
        dev_samples.append(mel_spectrogram)
        dev_labels.append(l)
    except Exception as e:
        print(f"Error processing file {row['filepath']}: {e}")

X_val = np.array(dev_samples)
y_val = np.array(dev_labels)


...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '1' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '1' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '1' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '1' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '0' | Shape = (128, 128, 1)
...Generated padded mel spectrogram for label = '1' | Shape = (128, 128, 1)
...Generated

In [41]:
np.savez("train_cnn.npz", data=X_train, label=y_train)
np.savez("val_cnn.npz", data=X_val, label = y_val)