In [None]:
#SpeechBrain file
import os
import numpy as np
import torchaudio
from speechbrain.pretrained.interfaces import foreign_class
from speechbrain.pretrained import EncoderClassifier
import torch
from tqdm import tqdm

# Paths
input_folder = '/home/sarthak/CAPSTONE_PROJECT/DRISHTI_SINGH/Dataset/bihar_files/'  # Replace with your folder path
output_folder = '/home/sarthak/CAPSTONE_PROJECT/DRISHTI_SINGH/Dataset/SpeechBrain_bihar_files'  # Replace with your output folder path
os.makedirs(output_folder, exist_ok=True)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the classifier
classifier = foreign_class(
    source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
    pymodule_file="custom_interface.py",
    classname="CustomEncoderWav2vec2Classifier"
)

def extract_features(path):
    # Load the audio file
    signal, fs = torchaudio.load(path)
    # Move the signal to the GPU
    signal = signal.to(device)
    # Get embeddings
    embeddings = classifier.encode_batch(signal)
    # Move embeddings back to CPU
    embeddings = embeddings.cpu()
    # Average embeddings and return
    return np.array(embeddings.mean(axis=0).squeeze())
    
# Get list of .flac files
flac_files = [file for file in os.listdir(input_folder) if file.endswith('.flac')]

# Process each file with a progress bar
for file_name in tqdm(flac_files, desc="Processing files", unit="file"):
    file_path = os.path.join(input_folder, file_name)
    
    # Extract embeddings
    embeddings = extract_features(file_path)
    
    # Save embeddings as .npy
    output_path = os.path.join(output_folder, file_name.replace('.flac', '.npy'))
    np.save(output_path, embeddings)
    print(f"Saved embeddings for {file_name} to {output_path}")

print("Done!")

In [1]:
#Wav2Vec2.0 embeddings

In [None]:
import os
import librosa
import numpy as np
import torch
from transformers import Wav2Vec2Model, Wav2Vec2Processor
from tqdm import tqdm  # For progress bars

# Paths
input_folder = '/home/sarthak/CAPSTONE_PROJECT/DRISHTI_SINGH/Dataset/bihar_files/'  # Replace with your folder path
output_folder = '/home/sarthak/CAPSTONE_PROJECT/DRISHTI_SINGH/Dataset/Embeddings_wav2vec2_bihar'  # Replace with your output folder path
os.makedirs(output_folder, exist_ok=True)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load Wave2Vec 2.0 model and processor
model_name = "facebook/wav2vec2-base"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2Model.from_pretrained(model_name).to(device)

# Function to extract embeddings
def extract_embeddings(file_path):
    # Load audio file
    audio, sr = librosa.load(file_path, sr=16000)  # Ensure 16kHz sampling rate
    
    # Process audio with the processor
    inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)
    
    # Move tensors to the GPU
    inputs = {key: value.to(device) for key, value in inputs.items()}
    
    # Get embeddings
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Convert embeddings to CPU and numpy
    embeddings = outputs.last_hidden_state.squeeze().cpu().numpy()
    
    return embeddings

# Get list of .flac files
flac_files = [file for file in os.listdir(input_folder) if file.endswith('.flac')]

# Process each file with a progress bar
for file_name in tqdm(flac_files, desc="Processing files", unit="file"):
    file_path = os.path.join(input_folder, file_name)
    
    # Extract embeddings
    embeddings = extract_embeddings(file_path)
    
    # Save embeddings as .npy
    output_path = os.path.join(output_folder, file_name.replace('.flac', '.npy'))
    np.save(output_path, embeddings)
    print(f"Saved embeddings for {file_name} to {output_path}")

print("Done!")
