In [10]:
import os
import numpy as np
import librosa
import xml.etree.ElementTree as ET
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import re

# Function to parse annotations from RML files
def parse_annotations(rml_file_path):
    annotations = []
    try:
        tree = ET.parse(rml_file_path)
        root = tree.getroot()
        for event in root.findall(".//ns0:Event", namespaces={"ns0": "http://www.respironics.com/PatientStudy.xsd"}):
            # Validate attributes
            family = event.attrib.get("Family", "").strip()
            type_ = event.attrib.get("Type", "").strip()
            start = event.attrib.get("Start", "0").strip()
            duration = event.attrib.get("Duration", "0").strip()

            # Convert attributes safely to float where applicable
            try:
                start = float(start)
                duration = float(duration)
            except ValueError:
                print(f"Invalid Start or Duration in annotation: Start={start}, Duration={duration}")
                continue

            if family == "Respiratory":
                annotations.append({"family": family, "type": type_, "start": start, "duration": duration})
    except Exception as e:
        print(f"Error parsing {rml_file_path}: {e}")
    return annotations

# Function to extract statistical features from audio
def extract_statistical_features(wav_file_path, annotations, sr=22050):
    features = []
    labels = []
    try:
        audio, _ = librosa.load(wav_file_path, sr=sr)
        audio_length = len(audio)
        print(f"Audio length (samples): {audio_length}")

        for annotation in annotations:
            start = annotation.get("start", 0)
            duration = annotation.get("duration", 0)

            start_sample = max(0, int(start * sr))
            end_sample = min(audio_length, int(start_sample + duration * sr))

            if end_sample > start_sample:
                segment = audio[start_sample:end_sample]

                # Extract statistical features
                zcr = np.mean(librosa.feature.zero_crossing_rate(y=segment))
                rmse = np.mean(librosa.feature.rms(y=segment))
                spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=segment, sr=sr))
                spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=segment, sr=sr))
                spectral_flatness = np.mean(librosa.feature.spectral_flatness(y=segment))
                spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=segment, sr=sr))

                # Combine features
                feature_vector = [zcr, rmse, spectral_centroid, spectral_bandwidth, spectral_flatness, spectral_rolloff]
                features.append(feature_vector)
                labels.append(annotation.get("type", "Unknown"))
            else:
                print(f"Skipping annotation with invalid segment: Start={start_sample}, End={end_sample}")
    except Exception as e:
        print(f"Error processing file {wav_file_path}: {e}")
    return np.array(features), labels

# Main function to process folders and train Random Forest Classifier
def process_and_train_random_forest(wav_folder_path, rml_folder_path, sr=22050):
    all_features = []
    all_labels = []

    # Match .wav and .rml files by identifier
    wav_files = {re.match(r"(.*)_mic_cleaned\.wav", f).group(1): f
                 for f in os.listdir(wav_folder_path) if f.endswith("_mic_cleaned.wav")}
    rml_files = {re.match(r"filtered_(.*)\.rml", f).group(1): f
                 for f in os.listdir(rml_folder_path) if f.endswith(".rml")}

    # Process matched files
    common_ids = set(wav_files.keys()).intersection(rml_files.keys())
    for file_id in common_ids:
        wav_file_path = os.path.join(wav_folder_path, wav_files[file_id])
        rml_file_path = os.path.join(rml_folder_path, rml_files[file_id])

        print(f"Processing: ID={file_id}, WAV={wav_files[file_id]}, RML={rml_files[file_id]}")
        annotations = parse_annotations(rml_file_path)
        print(f"Annotations found: {len(annotations)}")

        features, labels = extract_statistical_features(wav_file_path, annotations, sr)
        print(f"Features extracted: {len(features)}, Labels: {len(labels)}")

        if features.size > 0:
            all_features.extend(features)
            all_labels.extend(labels)

    if not all_features or not all_labels:
        print("No data available for training. Check file paths and annotations.")
        return

    # Train-test split and train classifier
    label_encoder = LabelEncoder()
    all_labels_encoded = label_encoder.fit_transform(all_labels)
    X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels_encoded, test_size=0.2, random_state=42)

    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Evaluate the model
    y_pred = clf.predict(X_test)
    labels = range(len(label_encoder.classes_))  # Explicitly specify all class labels

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, labels=labels, target_names=label_encoder.classes_))

    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred, labels=labels))


# Specify folder paths and execute
wav_folder_path = "/Users/terlan/Library/CloudStorage/OneDrive-uni-mannheim.de/cleaned_wav/Mic"
rml_folder_path = "/Users/terlan/Library/CloudStorage/OneDrive-uni-mannheim.de/cleaned_label"
process_and_train_random_forest(wav_folder_path, rml_folder_path)


Processing: ID=00001284-100507, WAV=00001284-100507_mic_cleaned.wav, RML=filtered_00001284-100507.rml
Annotations found: 277
Audio length (samples): 299725650
Features extracted: 277, Labels: 277
Processing: ID=00001390-100507, WAV=00001390-100507_mic_cleaned.wav, RML=filtered_00001390-100507.rml
Annotations found: 301
Audio length (samples): 325656450
Features extracted: 301, Labels: 301
Processing: ID=00001463-100507, WAV=00001463-100507_mic_cleaned.wav, RML=filtered_00001463-100507.rml
Annotations found: 247
Audio length (samples): 297763200
Features extracted: 247, Labels: 247
Processing: ID=00001016-100507, WAV=00001016-100507_mic_cleaned.wav, RML=filtered_00001016-100507.rml
Annotations found: 108
Audio length (samples): 481660200
Features extracted: 108, Labels: 108
Processing: ID=00001419-100507, WAV=00001419-100507_mic_cleaned.wav, RML=filtered_00001419-100507.rml
Annotations found: 164
Audio length (samples): 417296250
Features extracted: 164, Labels: 164
Processing: ID=00001

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Evaluate the model
y_pred = clf.predict(X_test)
labels = range(len(label_encoder.classes_))  # Explicitly specify all class labels

print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=labels, target_names=label_encoder.classes_))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred, labels=labels))
