 Data Preprocessing

In [1]:
import tensorflow as tf
print(tf.__version__)


2.5.0


In [4]:
import tensorflow as tf

# Check if TensorFlow detects GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    print("GPUs detected:", gpus)
else:
    print("No GPU detected")

# Get and print TensorFlow's CUDA version
cuda_version = tf.sysconfig.get_build_info()['cuda_version']
print(f"TensorFlow is using CUDA version: {cuda_version}")


No GPU detected
TensorFlow is using CUDA version: 64_112


In [6]:
import os
import librosa
import numpy as np
import soundfile as sf
from sklearn.model_selection import train_test_split

# Function to load and preprocess audio files
def preprocess_audio(file_path, target_sr=16000, duration=5):
    # Load the audio file
    audio, sr = librosa.load(file_path, sr=target_sr)
    
    # Trim or pad the audio to the target duration (in seconds)
    audio_length = duration * sr
    if len(audio) < audio_length:
        audio = np.pad(audio, (0, audio_length - len(audio)))
    else:
        audio = audio[:audio_length]
    
    return audio

# Function to save preprocessed audio data
def save_audio(file_path, audio_data):
    sf.write(file_path, audio_data, 16000)

# Load and preprocess the dataset
def load_dataset(data_dir, labels):
    audio_files = []
    labels_list = []
    
    for label in labels:
        files = os.listdir(os.path.join(data_dir, label))
        for file in files:
            if file.endswith('.wav'):
                audio_file_path = os.path.join(data_dir, label, file)
                audio = preprocess_audio(audio_file_path)
                audio_files.append(audio)
                labels_list.append(label)
    
    return np.array(audio_files), np.array(labels_list)

# Example usage: Load your dataset (make sure you have your paths set up)
data_dir = 'path_to_audio_data'  # Replace with actual path to dataset
labels = ['cry', 'scream', 'normal']  # Categories
audio_data, audio_labels = load_dataset(data_dir, labels)

# Split into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(audio_data, audio_labels, test_size=0.3, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)


ModuleNotFoundError: No module named 'librosa'

Model Fine-Tuning (YAMNet & Wav2Vec2)

In [4]:
import tensorflow as tf
import tensorflow_hub as hub

# Load pre-trained YAMNet model from TensorFlow Hub
yamnet_model = hub.load('https://tfhub.dev/google/yamnet/1')

# Convert audio into features for YAMNet
def extract_yamnet_features(audio_data):
    # Convert audio data to a tensor
    audio_data_tensor = tf.convert_to_tensor(audio_data, dtype=tf.float32)
    
    # Make predictions with YAMNet model
    scores, embeddings, spectrogram = yamnet_model(audio_data_tensor)
    
    return scores.numpy()

# Example: Extract features from one audio sample
features = extract_yamnet_features(X_train[0])
print(features.shape)  # Check shape of the features


ModuleNotFoundError: No module named 'tensorflow'

In [None]:
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
from datasets import load_dataset
import torch
from torch.utils.data import DataLoader

# Load pre-trained Wav2Vec2 model and processor
model_name = "facebook/wav2vec2-large-xlsr-53"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name, num_labels=3)

# Example of preprocessing audio for Wav2Vec2
def preprocess_for_wav2vec2(audio_data):
    inputs = processor(audio_data, return_tensors="pt", sampling_rate=16000, padding=True)
    return inputs

# Prepare dataset for training
def prepare_data_for_training(audio_data, labels):
    inputs = []
    for audio in audio_data:
        inputs.append(preprocess_for_wav2vec2(audio))
    return inputs

# Example: Prepare dataset
train_inputs = prepare_data_for_training(X_train, y_train)

# Create DataLoader for training
train_dataloader = DataLoader(train_inputs, batch_size=16)

# Example of training loop (simplified)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
model.train()

for batch in train_dataloader:
    optimizer.zero_grad()
    outputs = model(**batch)
    loss = outputs.loss
    loss.backward()
    optimizer.step()


 Ensemble Model

In [None]:


# Function for Majority Voting or Averaging (Ensemble)
def ensemble_predict(model_1_preds, model_2_preds, method='majority'):
    if method == 'majority':
        # Majority voting
        combined_preds = [np.bincount([pred1, pred2]).argmax() for pred1, pred2 in zip(model_1_preds, model_2_preds)]
    elif method == 'average':
        # Averaging the probabilities
        combined_preds = (model_1_preds + model_2_preds) / 2
        combined_preds = np.argmax(combined_preds, axis=1)
    
    return combined_preds

# Example usage:
model_1_preds = yamnet_predictions  # Replace with actual YAMNet predictions
model_2_preds = wav2vec2_predictions  # Replace with actual Wav2Vec2 predictions

ensemble_preds = ensemble_predict(model_1_preds, model_2_preds, method='majority')


Evaluation

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Example: Evaluate the ensemble model
print("Confusion Matrix:")
print(confusion_matrix(y_test, ensemble_preds))

print("Classification Report:")
print(classification_report(y_test, ensemble_preds))


Deployment with Temporal

In [None]:
from temporalio import workflow, activity
from temporalio.client import Client

@activity.defn
async def preprocess_audio_activity(audio_file_path: str):
    # Your audio preprocessing logic here
    return preprocessed_audio

@activity.defn
async def classify_audio_activity(audio_data):
    # Your classification logic using ensemble model here
    return ensemble_predictions

@workflow.defn
class AudioClassificationWorkflow:
    @workflow.run
    async def run(self, audio_file_path: str):
        # Define the workflow tasks
        preprocessed_audio = await workflow.execute_activity(preprocess_audio_activity, audio_file_path)
        predictions = await workflow.execute_activity(classify_audio_activity, preprocessed_audio)
        return predictions

# Start the Temporal client
client = await Client.connect("localhost:7233")
# Create and run the workflow
workflow_id = "audio-classification-workflow"
result = await client.execute_workflow(AudioClassificationWorkflow.run, audio_file_path="path_to_audio_file")
