In [1]:
import os

os.environ["WEKA_JAR_PATH"] = "C:/Program Files/Weka-3-9-6/weka.jar" 
os.environ["JAVA_HOME"] = "C:/Program Files/Weka-3-9-6/jre/zulu17.32.13-ca-fx-jre17.0.2-win_x64/bin"

import librosa
import numpy as np
import parselmouth
from scipy.stats import iqr, skew, kurtosis
from scipy.signal import find_peaks
import pandas as pd
import math
import weka.core.jvm as jvm
from weka.core.converters import Loader
from weka.classifiers import Classifier
import weka
import weka.core.serialization as SerializationHelper
import sounddevice as sd
import numpy as np
from collections import deque
import joblib
from sklearn.preprocessing import StandardScaler
import keyboard
from time import sleep

In [2]:
def safe_formant_values(formant_obj, formant_index, duration):
    times = np.arange(0, duration, 0.01)
    values = [formant_obj.get_value_at_time(formant_index, t) for t in times]
    return [v for v in values if v is not None and not np.isnan(v) and v > 0]

def extract_features(audio, sr=16000):
    y = audio

    # Normalize audio
    y = y / np.max(np.abs(y)) if np.max(np.abs(y)) > 0 else y
    
    # Extract MFCCs (first 13 coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)

    # Extract additional spectral features
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)[0]
    flatness = librosa.feature.spectral_flatness(y=y)[0]
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

    # Extract formants using parselmouth
    snd = parselmouth.Sound(y, sampling_frequency=sr)
    formant = snd.to_formant_burg()
    duration = snd.duration
    formants = [safe_formant_values(formant, i + 1, duration) for i in range(3)]

    # Extract pitch
    pitch = snd.to_pitch()
    pitches = pitch.selected_array['frequency'][pitch.selected_array['frequency'] > 0]

    # Extract harmonicity (HNR)
    harmonicity = snd.to_harmonicity_cc()
    hnr_values = harmonicity.values
    hnr_values = hnr_values[~np.isnan(hnr_values)]

    # Extract energy (RMS)
    energy = librosa.feature.rms(y=y)[0]

    # Statistical descriptors
    def compute_stats(data):
        skew_val = skew(data) if len(data) > 2 else 0
        kurt_val = kurtosis(data) if len(data) > 2 else 0
        skew_val = 0 if math.isnan(skew_val) else skew_val
        kurt_val = 0 if math.isnan(kurt_val) else kurt_val
        peak_count = len(find_peaks(data)[0]) if len(data) > 2 else 0
        range_val = (np.max(data) - np.min(data)) if len(data) > 0 else 0
        return {
            'mean': np.mean(data) if len(data) > 0 else 0,
            'median': np.median(data) if len(data) > 0 else 0,
            'std_dev': np.std(data) if len(data) > 0 else 0,
            'min': np.min(data) if len(data) > 0 else 0,
            'max': np.max(data) if len(data) > 0 else 0,
            'q1': np.percentile(data, 25) if len(data) > 0 else 0,
            'q3': np.percentile(data, 75) if len(data) > 0 else 0,
            'iqr': iqr(data) if len(data) > 0 else 0,
            'jitter': np.mean(np.abs(np.diff(data))) if len(data) > 1 else 0,
            'shimmer': np.std(np.abs(np.diff(data))) if len(data) > 1 else 0,
            'skewness': skew_val,
            'kurtosis': kurt_val,
            'peak_count': peak_count,
            'range': range_val
        }

    features = {}

    # Compute stats for MFCCs and their deltas
    for i in range(13):
        mfcc_stats = compute_stats(mfccs[i])
        delta_stats = compute_stats(delta_mfccs[i])
        delta2_stats = compute_stats(delta2_mfccs[i])
        for stat, value in mfcc_stats.items():
            features[f'mfcc_{i+1}_{stat}'] = value
        for stat, value in delta_stats.items():
            features[f'mfcc_{i+1}_delta_{stat}'] = value
        for stat, value in delta2_stats.items():
            features[f'mfcc_{i+1}_delta2_{stat}'] = value

    # Compute stats for spectral features
    for name, array in zip(
        ['zcr', 'spectral_centroid', 'spectral_bandwidth', 'spectral_rolloff', 'spectral_flatness'],
        [zcr, centroid, bandwidth, rolloff, flatness]
    ):
        stats = compute_stats(array)
        for stat, value in stats.items():
            features[f'{name}_{stat}'] = value

    for i in range(contrast.shape[0]):
        stats = compute_stats(contrast[i])
        for stat, value in stats.items():
            features[f'spectral_contrast_{i+1}_{stat}'] = value

    # Compute stats for Formants
    for i in range(3):
        formant_stats = compute_stats(formants[i])
        for stat, value in formant_stats.items():
            features[f'formant_{i+1}_{stat}'] = value

    # Compute stats for Pitch
    pitch_stats = compute_stats(pitches)
    for stat, value in pitch_stats.items():
        features[f'pitch_{stat}'] = value

    # Compute stats for Energy
    energy_stats = compute_stats(energy)
    for stat, value in energy_stats.items():
        features[f'energy_{stat}'] = value

    # Compute stats for Harmonicity (HNR)
    hnr_stats = compute_stats(hnr_values)
    for stat, value in hnr_stats.items():
        features[f'harmonicity_{stat}'] = value

    return features

In [None]:
jvm.start(packages=True)


DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['c:\\Users\\jbkee\\Desktop\\Jupyter-Projects\\.venv\\Lib\\site-packages\\weka\\lib\\arpack_combined.jar', 'c:\\Users\\jbkee\\Desktop\\Jupyter-Projects\\.venv\\Lib\\site-packages\\weka\\lib\\core.jar', 'c:\\Users\\jbkee\\Desktop\\Jupyter-Projects\\.venv\\Lib\\site-packages\\weka\\lib\\mtj.jar', 'c:\\Users\\jbkee\\Desktop\\Jupyter-Projects\\.venv\\Lib\\site-packages\\weka\\lib\\python-weka-wrapper.jar', 'c:\\Users\\jbkee\\Desktop\\Jupyter-Projects\\.venv\\Lib\\site-packages\\weka\\lib\\weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support enabled


In [4]:
# jvm.stop()

In [9]:

# 2) Load your ARFF header (so we know the attribute layout)
loader = Loader(classname="weka.core.converters.ArffLoader")
header = loader.load_file("ThreeClassStackReducedSet.arff")
header.class_is_last()

attributes = []
for i in range(header.num_attributes):
    name = header.attribute(i).name
    if name != "emotion":
        attributes.append(header.attribute(i).name)
additional_attributes = ["energy_iqr", "energy_jitter", "energy_kurtosis", "energy_max", "energy_mean", "energy_median", "energy_peak_count", "energy_q3", "energy_range", "energy_shimmer"]
attributes.extend(additional_attributes)
print("Attributes in the header: ", attributes)

# 3) Load the serialized model
jobject=SerializationHelper.read("ThreeClassStacking.model")
print(jobject)
cls = Classifier(jobject)
print(cls)

# 4) Live audio capture (example with sounddevice & a 5 s rolling buffer)

sr = 16000

# Create a rolling buffer to store the last 5 seconds of audio
buf = deque(maxlen=sr*10)

# Define the audio callback function to capture audio
def audio_callback(indata, frames, time, status):
    buf.extend(indata[:,0])

stream = sd.InputStream(samplerate=sr, channels=1, callback=audio_callback)

scaler = joblib.load("librossa_scaler.save")
assert isinstance(scaler, StandardScaler), "Scaler is not a StandardScaler instance."

Attributes in the header:  ['pitch_q3', 'pitch_median', 'pitch_mean', 'pitch_q1', 'spectral_centroid_q1', 'mfcc_3_mean', 'pitch_shimmer', 'mfcc_3_q1', 'spectral_flatness_q1', 'pitch_jitter', 'pitch_range', 'mfcc_1_delta_min', 'pitch_iqr', 'mfcc_3_q3', 'mfcc_2_mean', 'spectral_flatness_median', 'spectral_rolloff_q1', 'mfcc_2_q3', 'mfcc_3_median', 'pitch_max', 'spectral_centroid_mean', 'mfcc_3_min', 'mfcc_5_min', 'spectral_centroid_median', 'energy_q1', 'mfcc_4_min', 'mfcc_2_iqr', 'zcr_q1', 'pitch_std_dev', 'mfcc_4_mean', 'mfcc_2_max', 'mfcc_4_q1', 'zcr_std_dev', 'mfcc_2_median', 'mfcc_1_delta_skewness', 'spectral_centroid_min', 'spectral_contrast_1_mean', 'mfcc_2_q1', 'spectral_bandwidth_median', 'mfcc_6_std_dev', 'mfcc_3_max', 'energy_min', 'zcr_q3', 'mfcc_13_std_dev', 'mfcc_3_jitter', 'mfcc_2_jitter', 'mfcc_6_delta_std_dev', 'zcr_iqr', 'mfcc_1_jitter', 'zcr_range', 'mfcc_2_delta_jitter', 'mfcc_2_std_dev', 'mfcc_2_delta2_std_dev', 'mfcc_6_delta_shimmer', 'mfcc_13_range', 'mfcc_6_min', 

In [10]:
# Start the audio stream
import weka.core
from weka.core.dataset import Instance
import weka.core.dataset


stream.start()

print("Recording... Press 'q' to stop.")
# Wait for the user to press 'q' to stop recording
try:
    while True:
        if keyboard.is_pressed('q'):
            print("Stopping recording...")
            break
        if len(buf) > sr * 5:
            # Get the last 5 seconds of audio from the buffer
            segment = np.array(list(buf))[-sr*5:]
            feats = extract_features(segment, sr)
            print("Extracted features:", feats)
            # Convert the features to a DataFrame
            df = pd.DataFrame([feats])
            df = df[attributes]
            print("DataFrame:", df)
            # Scale the features using the loaded scaler
            scaled_features = scaler.transform(df)
            scaled_features = np.array(scaled_features, dtype=np.float32)[0]  # Convert to float32
            print("Scaled features:", scaled_features)
            # Create a new Weka instance with the same structure as the header
            # Create a new Weka instance with the same structure as the header
            weka_instance = Instance.create_instance(scaled_features)
            # Set the dataset for the instance
            
            # Set the class attribute to missing (since we want to predict it)
            weka_instance.set_missing(header.class_index)
            
            # Classify the instance using the loaded model
            predictions = list(cls.predict(weka_instance))  # Get the class distribution
            print(f"Predictions: {predictions}")
            # Get the predicted class label
            if predictions:  # Ensure the list is not empty
                pred_index = int(predictions[0])  # Get the first prediction (class index)
                predicted_class = header.class_attribute.value(pred_index)  # Map index to class label
                print(f"Predicted class index: {pred_index}")
                print(f"Predicted class: {predicted_class}")
            else:
                print("No predictions were made.")
        print("sleeping...")
        sleep(1.0)
finally:
    stream.stop()

Recording... Press 'q' to stop.
sleeping...
sleeping...
sleeping...
sleeping...
sleeping...
sleeping...
Extracted features: {'mfcc_1_mean': np.float32(-37.73904), 'mfcc_1_median': np.float32(-39.27087), 'mfcc_1_std_dev': np.float32(11.023379), 'mfcc_1_min': np.float32(-89.41008), 'mfcc_1_max': np.float32(21.420504), 'mfcc_1_q1': np.float32(-42.265976), 'mfcc_1_q3': np.float32(-35.777973), 'mfcc_1_iqr': np.float64(6.488002777099609), 'mfcc_1_jitter': np.float32(4.1202164), 'mfcc_1_shimmer': np.float32(5.8618894), 'mfcc_1_skewness': np.float64(1.7403428554534912), 'mfcc_1_kurtosis': np.float32(12.14225), 'mfcc_1_peak_count': 36, 'mfcc_1_range': np.float32(110.83058), 'mfcc_1_delta_mean': np.float32(0.07915878), 'mfcc_1_delta_median': np.float32(0.051525846), 'mfcc_1_delta_std_dev': np.float32(1.6430992), 'mfcc_1_delta_min': np.float32(-7.3605328), 'mfcc_1_delta_max': np.float32(5.0753207), 'mfcc_1_delta_q1': np.float32(-0.71832746), 'mfcc_1_delta_q3': np.float32(0.64703286), 'mfcc_1_delt

ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- energy_skewness
- energy_std_dev
- formant_1_iqr
- formant_1_jitter
- formant_1_kurtosis
- ...


In [None]:
jvm.stop()

In [None]:

# Start the audio stream
import weka.core
import weka.core.dataset


# with sd.InputStream(samplerate=sr, channels=1, callback=audio_callback):
#     print("Recording... Press 'q' to stop.")
#     while True:
#         if keyboard.is_pressed('q'):
#             break

#         # Check if we have enough data in the buffer
#         if len(buf) == buf.maxlen:
#             audio_data = np.array(buf).astype(np.float32)

#             # Extract features from the audio data
#             features = extract_features(audio_data, sr)

#             # Convert features to DataFrame for Weka
#             df = pd.DataFrame([features])

#             # Scale the features using the loaded scaler
#             scaled_features = scaler.transform(df)

#             # Extract attribute names from the ARFF header
#             attribute_names = [header.attribute(i).name for i in range(header.num_attributes - 1)]  # Exclude the class attribute

#             # Create an empty DataFrame with the attribute names as columns
#             filtered_features = pd.DataFrame(columns=attribute_names, index=[0])

#             # Map scaled features to the DataFrame
#             for i in range(header.num_attributes - 1):  # Exclude the class attribute
#                 attr_name = header.attribute(i).name
#                 if attr_name in df.columns:
#                     filtered_features[attr_name] = scaled_features[0][df.columns.get_loc(attr_name)]
#             # Convert the filtered features DataFrame to a list of values
#             feature_values = filtered_features.iloc[0].tolist()
#             print(type(feature_values), type(header))
#             weka_instance = weka.core.dataset.Instance.create_instance(header, feature_values)
#             weka_instance.set_dataset(header)
#             weka_instance.set_class_value(0)
            
#             # Classify the instance
#             prediction = list(cls.predict(weka_instance, verbose=True, distribution=True))
#             print(f"Predicted class index: {prediction}")
#             class_label = header.class_attribute.value(prediction)

#             print(f"Predicted class: {class_label}")

# # Stop the audio stream
# sd.stop()

In [None]:
# stream.stop()
jvm.stop()