In [1]:
import os
import torch
import numpy as np
import librosa
from tqdm import tqdm
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model

In [2]:

# input_audio, sample_rate = librosa.load("/content/bla.wav",  sr=16000)

model_name = "facebook/wav2vec2-large-xlsr-53"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
model = Wav2Vec2Model.from_pretrained(model_name)
min_duration = 4.0  # Minimum duration for padding/truncation
# i= feature_extractor(input_audio, return_tensors="pt", sampling_rate=sample_rate)
# with torch.no_grad():
#   o= model(i.input_values)
# print(o.keys())
# print(o.last_hidden_state.shape)
# print(o.extract_features.shape)

Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-large-xlsr-53 and are newly initialized: ['wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
# Initialize Wav2Vec2 model and feature extractor
# model_name = "facebook/wav2vec2-large-960h"
# feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
# model = Wav2Vec2Model.from_pretrained(model_name)
# min_duration = 4.0  # Minimum duration for padding/truncation

In [4]:
# Determine if a GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [5]:
# Move model to the GPU
model.to(device)

Wav2Vec2Model(
  (feature_extractor): Wav2Vec2FeatureEncoder(
    (conv_layers): ModuleList(
      (0): Wav2Vec2LayerNormConvLayer(
        (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))
        (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (activation): GELUActivation()
      )
      (1-4): 4 x Wav2Vec2LayerNormConvLayer(
        (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))
        (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (activation): GELUActivation()
      )
      (5-6): 2 x Wav2Vec2LayerNormConvLayer(
        (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))
        (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (activation): GELUActivation()
      )
    )
  )
  (feature_projection): Wav2Vec2FeatureProjection(
    (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (projection): Linear(in_features=512, out_features=1024, bias=True)
    (dropout)

In [6]:
def extract_features(audio_file_path, device, model, feature_extractor, min_duration=4.0):
    # Load and preprocess the audio file
    audio, sr = librosa.load(audio_file_path, sr=16000)
    duration = librosa.get_duration(y=audio, sr=sr)
    if duration < min_duration:
        pad_samples = int((min_duration - duration) * sr)
        audio = np.pad(audio, (0, pad_samples), mode='constant')
    elif duration > min_duration:
        audio = audio[:int(min_duration * sr)]

    audio_reshaped = np.reshape(audio, (1, -1))
    
    # Extract features using the feature extractor
    input_values = feature_extractor(audio_reshaped, return_tensors="pt", padding=True, sampling_rate=16000).input_values

    # Move tensors to the GPU
    input_values = input_values.to(device)

    with torch.no_grad():
        # Forward pass through the model
        outputs = model(input_values)
        hidden_states = outputs.last_hidden_state
        features_last_cnn_layer = outputs.extract_features

    # Move tensors back to CPU and convert to numpy arrays
    hidden_states = hidden_states.cpu().numpy().squeeze()
    features_last_cnn_layer = features_last_cnn_layer.cpu().numpy().squeeze()
    
    return hidden_states, features_last_cnn_layer

In [7]:
# Function to read labels
def read_labels(labels_file):
    labels_dict = {}
    with open(labels_file, 'r') as file:
        for line in file:
            parts = line.strip().split()
            audio_name = parts[1]
            label = parts[5]  # Assuming the label is at the 5th index
            label = 1 if label == 'spoof' else 0
            labels_dict[audio_name] = label
    return labels_dict

In [None]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\flac"
    labels_file = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\ASVspoof2021.LA.cm.eval.trl.txt.txt"
    output_dir = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Parameters for saving
    chunk_size = 40000
    file_counter = 1
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []

    # Iterate over the labeled audio files
    for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
        audio_file = os.path.join(audio_path, audio_name + ".flac")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

        # Save to files in chunks
        if len(hidden_states_list) >= chunk_size:
            # Convert lists to numpy arrays
            hidden_states_array = np.array(hidden_states_list)
            features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
            labels_array = np.array(labels_list)

            # Save features and labels to numpy files
            np.save(os.path.join(output_dir, f"XLSR_LA_hidden_states_features{file_counter:02d}.npy"), hidden_states_array)
            np.save(os.path.join(output_dir, f"XLSR_LA_features_last_cnn_layer{file_counter:02d}.npy"), features_last_cnn_layer_array)
            np.save(os.path.join(output_dir, f"XLSR_LA_labels{file_counter:02d}.npy"), labels_array)

            # Clear the lists and increment file counter
            hidden_states_list = []
            features_last_cnn_layer_list = []
            labels_list = []
            file_counter += 1

    # Save any remaining samples
    if hidden_states_list:
        hidden_states_array = np.array(hidden_states_list)
        features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
        labels_array = np.array(labels_list)

        np.save(os.path.join(output_dir, f"XLSR_LA_hidden_states_features{file_counter:02d}.npy"), hidden_states_array)
        np.save(os.path.join(output_dir, f"XLSR_LA_features_last_cnn_layer{file_counter:02d}.npy"), features_last_cnn_layer_array)
        np.save(os.path.join(output_dir, f"XLSR_LA_labels{file_counter:02d}.npy"), labels_array)

    print("Feature extraction and saving complete!")

For Resuming the downlaod 

In [9]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\flac"
    labels_file = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\ASVspoof2021.LA.cm.eval.trl.txt.txt"
    output_dir = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Determine last processed file index by checking existing numpy files
    existing_files = os.listdir(output_dir)
    processed_files = set()
    for file in existing_files:
        if file.startswith("XLSR_LA_hidden_states_features"):
            file_number = int(file.split("features")[1].split(".npy")[0])
            processed_files.add(file_number)

    # Parameters for saving
    chunk_size = 40000
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []
    file_counter = max(processed_files) + 1 if processed_files else 1

    # Iterate over the labeled audio files
    for i, (audio_name, label) in enumerate(tqdm(labels_dict.items(), desc="Extracting features"), start=1):
        if i <= (file_counter - 1) * chunk_size:
            continue  # Skip already processed files

        audio_file = os.path.join(audio_path, audio_name + ".flac")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

        # Save to files in chunks
        if len(hidden_states_list) >= chunk_size:
            # Convert lists to numpy arrays
            hidden_states_array = np.array(hidden_states_list)
            features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
            labels_array = np.array(labels_list)

            # Save features and labels to numpy files
            np.save(os.path.join(output_dir, f"XLSR_LA_hidden_states_features{file_counter:02d}.npy"), hidden_states_array)
            np.save(os.path.join(output_dir, f"XLSR_LA_features_last_cnn_layer{file_counter:02d}.npy"), features_last_cnn_layer_array)
            np.save(os.path.join(output_dir, f"XLSR_LA_labels{file_counter:02d}.npy"), labels_array)

            # Clear the lists and increment file counter
            hidden_states_list = []
            features_last_cnn_layer_list = []
            labels_list = []
            file_counter += 1

    # Save any remaining samples
    if hidden_states_list:
        hidden_states_array = np.array(hidden_states_list)
        features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
        labels_array = np.array(labels_list)

        np.save(os.path.join(output_dir, f"XLSR_LA_hidden_states_features{file_counter:02d}.npy"), hidden_states_array)
        np.save(os.path.join(output_dir, f"XLSR_LA_features_last_cnn_layer{file_counter:02d}.npy"), features_last_cnn_layer_array)
        np.save(os.path.join(output_dir, f"XLSR_LA_labels{file_counter:02d}.npy"), labels_array)

    print("Feature extraction and saving complete!")

Extracting features: 100%|███████████████████████████████████████████████████| 181566/181566 [2:40:54<00:00, 18.81it/s]


Feature extraction and saving complete!


In [None]:
# if __name__ == "__main__":
#     audio_path = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\flac"
#     labels_file = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_LA_eval\\ASVspoof2021.LA.cm.eval.trl.txt.txt"
#     output_hidden_states_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_LA_hidden_states_features.npy"
#     output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_LA_features_last_cnn_layer.npy"
#     output_labels_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_LA_labels.npy"

#     # Read labels
#     labels_dict = read_labels(labels_file)

#     # Extract features and store labels
#     hidden_states_list = []
#     features_last_cnn_layer_list = []
#     labels_list = []

#     # Iterate over the labeled audio files
#     for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
#         audio_file = os.path.join(audio_path, audio_name + ".flac")

#         # Check if file exists
#         if not os.path.exists(audio_file):
#             print(f"File '{audio_file}' not found. Skipping...")
#             continue

#         # Extract features
#         hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

#         # Store features and label if feature extraction was successful
#         hidden_states_list.append(hidden_states)
#         features_last_cnn_layer_list.append(features_last_cnn_layer)
#         labels_list.append(label)

#     # Convert lists to numpy arrays
#     hidden_states_array = np.array(hidden_states_list)
#     features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
#     labels_array = np.array(labels_list)

#     # Save features and labels to numpy files
#     np.save(output_hidden_states_file, hidden_states_array)
#     np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
#     np.save(output_labels_file, labels_array)

In [None]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_DF_eval\\flac\\"
    labels_file = "F:\\Awais_data\\Datasets\\ASV21\\ASVspoof2021_DF_eval\\ASVspoof2021.DF.cm.eval.trl.txt.txt"
    output_hidden_states_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_DF_hidden_states_features.npy"
    output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_DF_features_last_cnn_layer.npy"
    output_labels_file = "F:\\Awais_data\\Datasets\\ASV21\\Features\\SSL\\W2V_DF_labels.npy"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Extract features and store labels
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []

    # Iterate over the labeled audio files
    for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
        audio_file = os.path.join(audio_path, audio_name + ".flac")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

    # Convert lists to numpy arrays
    hidden_states_array = np.array(hidden_states_list)
    features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
    labels_array = np.array(labels_list)

    # Save features and labels to numpy files
    np.save(output_hidden_states_file, hidden_states_array)
    np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
    np.save(output_labels_file, labels_array)

In [None]:
# if __name__ == "__main__":
#     audio_path = "F:\\Awais_data\\Datasets\\PartialSpoof\\Train\\con_wav"
#     labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\protocols\\PartialSpoof_LA_cm_protocols\\PartialSpoof.LA.cm.train.trl.txt"
#     output_hidden_states_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_Train_hidden_states_features.npy"
#     output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_Train_features_last_cnn_layer.npy"
#     output_labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_Train_labels.npy"

#     # Read labels
#     labels_dict = read_labels(labels_file)

#     # Extract features and store labels
#     hidden_states_list = []
#     features_last_cnn_layer_list = []
#     labels_list = []

#     # Iterate over the labeled audio files
#     for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
#         audio_file = os.path.join(audio_path, audio_name + ".wav")

#         # Check if file exists
#         if not os.path.exists(audio_file):
#             print(f"File '{audio_file}' not found. Skipping...")
#             continue

#         # Extract features
#         hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

#         # Store features and label if feature extraction was successful
#         hidden_states_list.append(hidden_states)
#         features_last_cnn_layer_list.append(features_last_cnn_layer)
#         labels_list.append(label)

#     # Convert lists to numpy arrays
#     hidden_states_array = np.array(hidden_states_list)
#     features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
#     labels_array = np.array(labels_list)

#     # Save features and labels to numpy files
#     np.save(output_hidden_states_file, hidden_states_array)
#     np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
#     np.save(output_labels_file, labels_array)

In [None]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\ASVspoof2019_LA_dev\\flac\\"
    labels_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\ASVspoof2019_LA_cm_protocols\\ASVspoof2019.LA.cm.dev.trl.txt"
    output_hidden_states_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_dev_hidden_states_features.npy"
    output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_dev_features_last_cnn_layer.npy"
    output_labels_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_dev_labels.npy"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Extract features and store labels
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []

    # Iterate over the labeled audio files
    for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
        audio_file = os.path.join(audio_path, audio_name + ".flac")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

    # Convert lists to numpy arrays
    hidden_states_array = np.array(hidden_states_list)
    features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
    labels_array = np.array(labels_list)

    # Save features and labels to numpy files
    np.save(output_hidden_states_file, hidden_states_array)
    np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
    np.save(output_labels_file, labels_array)

In [None]:
# if __name__ == "__main__":
#     audio_path = "F:\\Awais_data\\Datasets\\PartialSpoof\\dev\\con_wav"
#     labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\protocols\\PartialSpoof_LA_cm_protocols\\PartialSpoof.LA.cm.dev.trl.txt"
#     output_hidden_states_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_dev_hidden_states_features.npy"
#     output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_dev_features_last_cnn_layer.npy"
#     output_labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_dev_labels.npy"

#     # Read labels
#     labels_dict = read_labels(labels_file)

#     # Extract features and store labels
#     hidden_states_list = []
#     features_last_cnn_layer_list = []
#     labels_list = []

#     # Iterate over the labeled audio files
#     for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
#         audio_file = os.path.join(audio_path, audio_name + ".wav")

#         # Check if file exists
#         if not os.path.exists(audio_file):
#             print(f"File '{audio_file}' not found. Skipping...")
#             continue

#         # Extract features
#         hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

#         # Store features and label if feature extraction was successful
#         hidden_states_list.append(hidden_states)
#         features_last_cnn_layer_list.append(features_last_cnn_layer)
#         labels_list.append(label)

#     # Convert lists to numpy arrays
#     hidden_states_array = np.array(hidden_states_list)
#     features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
#     labels_array = np.array(labels_list)

#     # Save features and labels to numpy files
#     np.save(output_hidden_states_file, hidden_states_array)
#     np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
#     np.save(output_labels_file, labels_array)

In [None]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\ASVspoof2019_LA_eval\\flac\\"
    labels_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\ASVspoof2019_LA_cm_protocols\\ASVspoof2019.LA.cm.eval.trl.txt"
    output_hidden_states_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_eval_hidden_states_features.npy"
    output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_eval_features_last_cnn_layer.npy"
    output_labels_file = "F:\\Awais_data\\Datasets\\asvspoof2019\\LA\\Features\\SSL\\W2V_eval_labels.npy"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Extract features and store labels
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []

    # Iterate over the labeled audio files
    for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
        audio_file = os.path.join(audio_path, audio_name + ".flac")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

    # Convert lists to numpy arrays
    hidden_states_array = np.array(hidden_states_list)
    features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
    labels_array = np.array(labels_list)

    # Save features and labels to numpy files
    np.save(output_hidden_states_file, hidden_states_array)
    np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
    np.save(output_labels_file, labels_array)

In [None]:
if __name__ == "__main__":
    audio_path = "F:\\Awais_data\\Datasets\\PartialSpoof\\eval\\con_wav"
    labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\protocols\\PartialSpoof_LA_cm_protocols\\PartialSpoof.LA.cm.eval.trl.txt"
    output_hidden_states_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_eval_hidden_states_features.npy"
    output_features_last_cnn_layer_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_eval_features_last_cnn_layer.npy"
    output_labels_file = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL\\XLSR_eval_labels.npy"

    # Read labels
    labels_dict = read_labels(labels_file)

    # Extract features and store labels
    hidden_states_list = []
    features_last_cnn_layer_list = []
    labels_list = []

    # Iterate over the labeled audio files
    for audio_name, label in tqdm(labels_dict.items(), desc="Extracting features"):
        audio_file = os.path.join(audio_path, audio_name + ".wav")

        # Check if file exists
        if not os.path.exists(audio_file):
            print(f"File '{audio_file}' not found. Skipping...")
            continue

        # Extract features
        hidden_states, features_last_cnn_layer = extract_features(audio_file, device, model, feature_extractor)

        # Store features and label if feature extraction was successful
        hidden_states_list.append(hidden_states)
        features_last_cnn_layer_list.append(features_last_cnn_layer)
        labels_list.append(label)

    # Convert lists to numpy arrays
    hidden_states_array = np.array(hidden_states_list)
    features_last_cnn_layer_array = np.array(features_last_cnn_layer_list)
    labels_array = np.array(labels_list)

    # Save features and labels to numpy files
    np.save(output_hidden_states_file, hidden_states_array)
    np.save(output_features_last_cnn_layer_file, features_last_cnn_layer_array)
    np.save(output_labels_file, labels_array)

After feature Extraction Classification 

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from scipy.optimize import brentq
from scipy.interpolate import interp1d
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [None]:
# Define helper functions to compute metrics
def compute_eer(fpr, tpr):
    eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
    return eer

# Load features and labels
def load_features(file_path):
    return np.load(file_path)

In [None]:
def evaluate_model(model, X_val, y_val, X_test, y_test):
    y_val_pred = model.predict(X_val)
    y_test_pred = model.predict(X_test)
    y_val_prob = model.predict_proba(X_val)[:, 1]
    y_test_prob = model.predict_proba(X_test)[:, 1]

    val_auc = roc_auc_score(y_val, y_val_prob)
    test_auc = roc_auc_score(y_test, y_test_prob)

    val_accuracy = accuracy_score(y_val, y_val_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    fpr, tpr, _ = roc_curve(y_test, y_test_prob)
    test_eer = compute_eer(fpr, tpr)

    return val_auc, test_auc, val_accuracy, test_accuracy, test_eer


In [None]:
#GPU execution
# def evaluate_model(model, dataloader, device):
#     model.eval()
#     targets, outputs = [], []
#     with torch.no_grad():
#         for data, target in dataloader:
#             data, target = data.to(device), target.to(device)
#             output = model(data)
#             targets.extend(target.cpu().numpy())
#             outputs.extend(output.cpu().numpy())

#     auc = roc_auc_score(targets, outputs)
#     accuracy = accuracy_score(targets, np.round(outputs))

#     fpr, tpr, _ = roc_curve(targets, outputs)
#     eer = compute_eer(fpr, tpr)

#     return auc, accuracy, eer

In [None]:
# # Set paths to feature files
# data_dir = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL"
# X_train_file = os.path.join(data_dir, "Train_combined_features.npy")
# X_val_file = os.path.join(data_dir, "Val_combined_features.npy")
# X_test_file = os.path.join(data_dir, "Test_combined_features.npy")
# y_train_file = os.path.join(data_dir, "Train_labels.npy")
# y_val_file = os.path.join(data_dir, "Val_labels.npy")
# y_test_file = os.path.join(data_dir, "Test_labels.npy")

In [None]:
# Set paths to feature files
data_dir = "F:\\Awais_data\\Datasets\\PartialSpoof\\Features\\training\\SSL"
X_train_hidden_file = os.path.join(data_dir, "Train_hidden_states_features.npy")
X_val_hidden_file = os.path.join(data_dir, "dev_hidden_states_features.npy")
X_test_hidden_file = os.path.join(data_dir, "eval_hidden_states_features.npy")
X_train_cnn_file = os.path.join(data_dir, "Train_features_last_cnn_layer.npy")
X_val_cnn_file = os.path.join(data_dir, "dev_features_last_cnn_layer.npy")
X_test_cnn_file = os.path.join(data_dir, "eval_features_last_cnn_layer.npy")
y_train_file = os.path.join(data_dir, "Train_labels.npy")
y_val_file = os.path.join(data_dir, "dev_labels.npy")
y_test_file = os.path.join(data_dir, "eval_labels.npy")

# Load features and labels
X_train_hidden = load_features(X_train_hidden_file)
X_val_hidden = load_features(X_val_hidden_file)
X_test_hidden = load_features(X_test_hidden_file)
X_train_cnn = load_features(X_train_cnn_file)
X_val_cnn = load_features(X_val_cnn_file)
X_test_cnn = load_features(X_test_cnn_file)
y_train = load_features(y_train_file)
y_val = load_features(y_val_file)
y_test = load_features(y_test_file)

X_train_hidden = X_train_hidden.reshape(X_train_hidden.shape[0], -1)
X_val_hidden = X_val_hidden.reshape(X_val_hidden.shape[0], -1)
X_test_hidden = X_test_hidden.reshape(X_test_hidden.shape[0], -1)
X_train_cnn = X_train_cnn.reshape(X_train_cnn.shape[0], -1)
X_val_cnn = X_val_cnn.reshape(X_val_cnn.shape[0], -1)
X_test_cnn = X_test_cnn.reshape(X_test_cnn.shape[0], -1)

# Combine features
X_train = X_train_hidden
X_val = X_val_hidden
X_test = X_test_hidden

# Combine features
# X_train = np.concatenate((X_train_hidden, X_train_cnn), axis=1)
# X_val = np.concatenate((X_val_hidden, X_val_cnn), axis=1)
# X_test = np.concatenate((X_test_hidden, X_test_cnn), axis=1)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [None]:
# Convert data to PyTorch tensors
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
# y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
# X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
# y_val_tensor = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1).to(device)
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
# y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)


In [None]:
# Define and train SVM
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)
svm_val_auc, svm_test_auc, svm_val_accuracy, svm_test_accuracy, svm_test_eer = evaluate_model(svm_model, X_val, y_val, X_test, y_test)

print(f"SVM - AUC: {svm_test_auc}, Accuracy: {svm_test_accuracy}, EER: {svm_test_eer}")

In [None]:
# Define the MLP model for GPU
# class MLP(nn.Module):
#     def __init__(self, input_size):
#         super(MLP, self).__init__()
#         self.layers = nn.Sequential(
#             nn.Linear(input_size, 256),
#             nn.ReLU(),
#             nn.Linear(256, 128),
#             nn.ReLU(),
#             nn.Linear(128, 64),
#             nn.ReLU(),
#             nn.Linear(64, 1),
#             nn.Sigmoid()
#         )

#     def forward(self, x):
#         return self.layers(x)

In [None]:
# # Define and train MLP
# mlp_model = MLPClassifier(hidden_layer_sizes=(256, 128, 64), max_iter=1000, random_state=42)
# mlp_model.fit(X_train, y_train)
# mlp_val_auc, mlp_test_auc, mlp_val_accuracy, mlp_test_accuracy, mlp_test_eer = evaluate_model(mlp_model, X_val, y_val, X_test, y_test)

# print(f"MLP - AUC: {mlp_test_auc}, Accuracy: {mlp_test_accuracy}, EER: {mlp_test_eer}")


In [None]:
# Light CNN Model
class LightCNN(nn.Module):
    def __init__(self):
        super(LightCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * X_train.shape[1], 128)
        self.fc2 = nn.Linear(128, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.relu(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [None]:
def train_lightcnn(model, train_loader, criterion, optimizer, device):
    model.train()
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

def evaluate_lightcnn(model, val_loader, test_loader, device):
    model.eval()
    val_targets, val_outputs = [], []
    test_targets, test_outputs = [], []
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            val_targets.extend(target.cpu().numpy())
            val_outputs.extend(output.cpu().numpy())

        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_targets.extend(target.cpu().numpy())
            test_outputs.extend(output.cpu().numpy())

    val_auc = roc_auc_score(val_targets, val_outputs)
    test_auc = roc_auc_score(test_targets, test_outputs)

    val_accuracy = accuracy_score(val_targets, np.round(val_outputs))
    test_accuracy = accuracy_score(test_targets, np.round(test_outputs))

    fpr, tpr, _ = roc_curve(test_targets, test_outputs)
    test_eer = compute_eer(fpr, tpr)

    return val_auc, test_auc, val_accuracy, test_accuracy, test_eer


In [None]:
# # Prepare DataLoader
# def prepare_dataloader(X, y, batch_size=32):
#     dataset = TensorDataset(X, y)
#     return DataLoader(dataset, batch_size=batch_size, shuffle=True)

# batch_size = 32
# train_loader = prepare_dataloader(X_train_tensor, y_train_tensor, batch_size)
# val_loader = prepare_dataloader(X_val_tensor, y_val_tensor, batch_size)
# test_loader = prepare_dataloader(X_test_tensor, y_test_tensor, batch_size)

# # Initialize and train MLP model
# input_size = X_train.shape[1]
# mlp_model = MLP(input_size).to(device)
# criterion = nn.BCELoss()
# optimizer = optim.Adam(mlp_model.parameters(), lr=0.001)

# epochs = 10
# for epoch in range(epochs):
#     mlp_model.train()
#     for data, target in train_loader:
#         optimizer.zero_grad()
#         output = mlp_model(data)
#         loss = criterion(output, target)
#         loss.backward()
#         optimizer.step()

# # Evaluate the model
# mlp_val_auc, mlp_val_accuracy, mlp_val_eer = evaluate_model(mlp_model, val_loader, device)
# mlp_test_auc, mlp_test_accuracy, mlp_test_eer = evaluate_model(mlp_model, test_loader, device)

# print(f"MLP - Validation AUC: {mlp_val_auc}, Validation Accuracy: {mlp_val_accuracy}, Validation EER: {mlp_val_eer}")
# print(f"MLP - Test AUC: {mlp_test_auc}, Test Accuracy: {mlp_test_accuracy}, Test EER: {mlp_test_eer}")

In [None]:
# Prepare DataLoader
def prepare_dataloader(X, y, batch_size=32):
    tensor_X = torch.tensor(X, dtype=torch.float32).unsqueeze(1)  # Add channel dimension
    tensor_y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Binary classification
    dataset = TensorDataset(tensor_X, tensor_y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

batch_size = 32
train_loader = prepare_dataloader(X_train, y_train, batch_size)
val_loader = prepare_dataloader(X_val, y_val, batch_size)
test_loader = prepare_dataloader(X_test, y_test, batch_size)

# Train and evaluate LightCNN
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lightcnn_model = LightCNN().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(lightcnn_model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    train_lightcnn(lightcnn_model, train_loader, criterion, optimizer, device)

lightcnn_val_auc, lightcnn_test_auc, lightcnn_val_accuracy, lightcnn_test_accuracy, lightcnn_test_eer = evaluate_lightcnn(lightcnn_model, val_loader, test_loader, device)

print(f"LightCNN - AUC: {lightcnn_test_auc}, Accuracy: {lightcnn_test_accuracy}, EER: {lightcnn_test_eer}")


In [None]:
# One-Class SVM Classifier
from sklearn.svm import OneClassSVM

# Assuming that the positive class is labeled as 1
# Transform y_train to be 1 for the positive class and -1 for the negative class
y_train_one_class = np.where(y_train == 1, 1, -1)
y_val_one_class = np.where(y_val == 1, 1, -1)
y_test_one_class = np.where(y_test == 1, 1, -1)

one_class_model = OneClassSVM(kernel="rbf", gamma='scale', nu=0.5)
one_class_model.fit(X_train[y_train == 1])  # Train only on the positive class

# Evaluate the model
val_scores = one_class_model.decision_function(X_val)
test_scores = one_class_model.decision_function(X_test)

val_auc = roc_auc_score(y_val_one_class, val_scores)
test_auc = roc_auc_score(y_test_one_class, test_scores)

val_accuracy = accuracy_score(y_val_one_class, np.sign(val_scores))
test_accuracy = accuracy_score(y_test_one_class, np.sign(test_scores))

fpr, tpr, _ = roc_curve(y_test_one_class, test_scores)
test_eer = compute_eer(fpr, tpr)

print(f"One-Class SVM - AUC: {test_auc}, Accuracy: {test_accuracy}, EER: {test_eer}")
