In [None]:
import pandas as pd
import os
from google.colab import drive
drive.mount('/content/drive');


opensmile_path = "/content/drive/MyDrive/WuHaoAllenCentad/audiosegmentationopensmilepredictions.csv";
wav2vec_path = "/content/drive/MyDrive/WuHaoAllenCentad/wav2vec2predictions.csv";
text_path = "/content/drive/MyDrive/speechtexttranscriptionpredicted.csv";
classic_path="/content/drive/MyDrive/logistic_regression_predictions.csv";


try:
    df_opensmile = pd.read_csv(opensmile_path);
    print(f"Loaded OpenSmile data from {opensmile_path}");
except Exception as e:
    print(f"Error loading OpenSmile data: {e}");
    df_opensmile = pd.DataFrame(columns=['subject/task', 'stress_probability']);

try:
    df_wav2vec = pd.read_csv(wav2vec_path);
    print(f"Loaded Wav2Vec2 data from {wav2vec_path}");
except Exception as e:
    print(f"Error loading Wav2Vec2 data: {e}");
    df_wav2vec = pd.DataFrame(columns=['subject/task', 'stress_probability']);

try:
    df_text = pd.read_csv(text_path);
    print(f"Loaded Text/BERT data from {text_path}");
except Exception as e:
    print(f"Error loading Text/BERT data: {e}");
    df_text = pd.DataFrame(columns=['subject/task', 'predicted stressed probability']);

try:
    df_classic = pd.read_csv(classic_path);
    print(f"Loaded Text/BERT data from {classic_path}");
except Exception as e:
    print(f"Error loading Text/BERT data: {e}");
    df_text = pd.DataFrame(columns=['subject/task', 'predicted_stress_probability']);

display(df_classic.head());

def preprocess_df(df, source_name):

    if 'subject_task' in df.columns and 'subject/task' not in df.columns:
        df = df.rename(columns={'subject_task': 'subject/task'});
    if (('predicted stressed probability' in df.columns) or ("predicted_stressed_probability" in df.columns)) and 'stress_probability' not in df.columns:
        df = df.rename(columns={'predicted stressed probability': 'stress_probability', "predicted_stressed_probability":"stress_probability"});


    if 'subject/task' not in df.columns or 'stress_probability' not in df.columns:
        print(f"Warning: Required columns missing in {source_name} dataframe. Columns: {df.columns}");
        return pd.DataFrame(columns=['subject/task', f'prob_{source_name}']);


    df_agg = df.groupby('subject/task')['stress_probability'].mean().reset_index();


    new_col_name = f'prob_{source_name}';
    df_agg = df_agg.rename(columns={'stress_probability': new_col_name});

    return df_agg;


df_opensmile_processed = preprocess_df(df_opensmile, 'opensmile');
df_wav2vec_processed = preprocess_df(df_wav2vec, 'wav2vec');
df_text_processed = preprocess_df(df_text, 'text');
df_classic_processed = preprocess_df(df_classic, 'classic');


print("\nOpenSmile Predictions (Processed):");
print(df_opensmile_processed.head());
print("\nWav2Vec2 Predictions (Processed):");
print(df_wav2vec_processed.head());
print("\nText/BERT Predictions (Processed):");
print(df_text_processed.head());
print("\nClassic Predictions (Processed):");
print(df_classic_processed.head());

In [None]:
import pandas as pd


merged_temp = pd.merge(df_opensmile_processed, df_wav2vec_processed, on='subject/task', how='inner');
features_df = pd.merge(merged_temp, df_text_processed, on='subject/task', how='inner');

features_df = pd.merge(features_df, df_classic_processed, on='subject/task', how='inner');


print(f"Merged Features DataFrame Shape: {features_df.shape}");
print("First 5 rows of features_df:");
print(features_df.head());

In [None]:
import pandas as pd


train_path = "/content/drive/MyDrive/WuHaoAllenCentad/stressidtrainbalanced2.csv";
val_path = "/content/drive/MyDrive/WuHaoAllenCentad/stressidtest2.csv";


df_train_labels = pd.read_csv(train_path);
df_val_labels = pd.read_csv(val_path);


if 'subject_task' in df_train_labels.columns:
    df_train_labels = df_train_labels.rename(columns={'subject_task': 'subject/task'});
if 'subject_task' in df_val_labels.columns:
    df_val_labels = df_val_labels.rename(columns={'subject_task': 'subject/task'});


train_merged = pd.merge(df_train_labels, features_df, on='subject/task', how='inner');
val_merged = pd.merge(df_val_labels, features_df, on='subject/task', how='inner');


feature_cols = ['prob_opensmile', 'prob_wav2vec', "prob_text","prob_classic"];


target_col = 'binary-stress';


X_train = train_merged[feature_cols];
y_train = train_merged[target_col];

X_val = val_merged[feature_cols];
y_val = val_merged[target_col];

print(f"Training set shape: {X_train.shape}");
print(f"Validation set shape: {X_val.shape}");
print("First 5 rows of X_train:");
print(X_train.head());
print(y_train.head());

In [None]:
import pandas as pd


weights = {
    'prob_classic': 0.6934,
    'prob_wav2vec': 0.6358,
    'prob_opensmile': 0.656,
    'prob_text': 0.4429
};


total_weight = sum(weights.values());
print(f"Total Weight: {total_weight}");


X_val_ensemble = X_val.copy();

X_val_ensemble['prob_ensemble'] = (
    X_val['prob_classic'] * weights['prob_classic'] +
    X_val['prob_wav2vec'] * weights['prob_wav2vec'] +
    X_val['prob_opensmile'] * weights['prob_opensmile']
    + X_val['prob_text'] * weights['prob_text']
  ) / total_weight;


print("First 5 rows of ensemble probabilities:");
print(X_val_ensemble[['prob_classic', 'prob_wav2vec', 'prob_opensmile', 'prob_text', 'prob_ensemble']].head());

In [None]:
from sklearn.metrics import balanced_accuracy_score, f1_score, roc_auc_score, precision_score, recall_score


y_pred_ensemble = (X_val_ensemble['prob_ensemble'] >= 0.5).astype(int);


bal_acc_ens = balanced_accuracy_score(y_val, y_pred_ensemble);
f1_macro_ens = f1_score(y_val, y_pred_ensemble, average='macro');
roc_auc_ens = roc_auc_score(y_val, X_val_ensemble['prob_ensemble']);
precision_macro_ens = precision_score(y_val, y_pred_ensemble, average='macro');
recall_macro_ens = recall_score(y_val, y_pred_ensemble, average='macro');


print("Weighted Ensemble Performance:");
print(f"Balanced Accuracy: {bal_acc_ens:.4f}");
print(f"F1 (macro):        {f1_macro_ens:.4f}");
print(f"AUC (ROC-AUC):     {roc_auc_ens:.4f}");
print(f"Precision (macro): {precision_macro_ens:.4f}");
print(f"Recall (macro):    {recall_macro_ens:.4f}");

In [None]:
from sklearn.metrics import precision_score, recall_score


precision_pos = precision_score(y_val, y_pred_ensemble, pos_label=1);
recall_pos = recall_score(y_val, y_pred_ensemble, pos_label=1);

print("Weighted Ensemble Metrics for Positive Class (Stressed):");
print(f"Precision (Class 1): {precision_pos:.4f}");
print(f"Recall (Class 1):    {recall_pos:.4f}");

In [None]:
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score


accuracy = accuracy_score(true_labels, predicted_labels);


report = classification_report(true_labels, predicted_labels, output_dict=True);


roc_auc = roc_auc_score(true_labels, predicted_probabilities);

print(f"Overall Accuracy: {accuracy:.4f}");
print(f"Macro F1-score: {report['macro avg']['f1-score']:.4f}");
print(f"ROC-AUC Score: {roc_auc:.4f}");
print(f"Precision (macro avg): {report['macro avg']['precision']:.4f}");
print(f"Recall (macro avg): {report['macro avg']['recall']:.4f}");

print("\nClassification Report:");
print(classification_report(true_labels, predicted_labels));