# Speech Features and Neuropsychological Questionnaires


##### Import modules

In [10]:
import pandas as pd
#import plotly.graph_objects as go # plot heatmap of correlation
#import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import pearsonr
#import plotly
import matplotlib
import scipy
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import multipletests


In [12]:
# Print the versions of the packages
versions = {
    "pandas": pd.__version__,
    #"plotly": plotly.__version__,
    #"seaborn": sns.__version__,
    "matplotlib": matplotlib.__version__,
    "numpy": np.__version__,
    "scipy": scipy.__version__
}
versions

{'pandas': '2.2.3', 'matplotlib': '3.9.3', 'numpy': '2.1.3', 'scipy': '1.14.1'}

In [29]:
analysis_df = pd.read_csv('/Users/julia/Desktop/Git/SubliminalVideoPriming/data/speech/speech_categories.csv')
analysis_df.head()

Unnamed: 0,bids_number,Gender,Age,Sex,Group,duration_pos,loudness_mean_pos,pause_durations_sum_pos,pause_durations_mean_pos,number_of_pauses_pos,...,CERQ_Self_Blame,CERQ_Acceptance,CERQ_Rumination,CERQ_Positive_Refocusing,CERQ_Refocusing_on_Planning,CERQ_Positive_Reappraisal,CERQ_Putting_into_Perspective,CERQ_Catastrophizing,CERQ_Other_Blame,Group_recode
0,Sub-058,female,24,1,2,11.935063,-56.788425,4.09,0.136333,30,...,18,9,10,4,7,5,4,13,5,0
1,Sub-035,female,30,1,1,70.472562,-50.704731,39.15,0.323554,121,...,12,16,17,4,8,8,12,8,5,1
2,Sub-034,female,25,1,2,43.444563,-52.90418,26.7,0.317857,84,...,9,16,14,12,16,16,18,4,6,0
3,Sub-108,male,26,2,1,61.62575,-50.562834,26.78,0.25028,107,...,11,11,10,7,14,10,14,5,7,1
4,Sub-046,female,30,1,2,7.082125,-65.337544,3.76,0.235,16,...,6,17,14,8,10,9,13,7,16,0


# Interaction between speech an questionnaire results with mediation by group

Use moderated regression model

In [31]:
category_cols = ["duration_pos", "loudness_mean_pos", "pause_durations_sum_pos", "pause_durations_mean_pos", "number_of_pauses_pos","word_frequency_mean_pos", "word_count_pos", 
                 "negative_sentence_ratio_pos", "neutral_sentence_ratio_pos", "positive_sentence_ratio_pos", "duration_neg", "loudness_mean_neg","pause_durations_sum_neg", 
                 "pause_durations_mean_neg","number_of_pauses_neg", "word_frequency_mean_neg", "word_count_neg", "negative_sentence_ratio_neg", "neutral_sentence_ratio_neg", "positive_sentence_ratio_neg"]

questionnaire_col = ['BVAQ','BDI','STAI2','STAI1',
                     'DERS_Total_Score','DERS_Non_Acceptance_of_Emotional_Reactions','DERS_Problems_with_Goal_Oriented_Behavior',
                     'DERS_Impulse_Control_Problems','DERS_Lack_of_Emotional_Awareness','DERS_Limited_Access_to_Emotion_Regulation_Strategies',
                     'DERS_Lack_of_Emotional_Clarity','CERQ_Self_Blame','CERQ_Acceptance','CERQ_Rumination','CERQ_Positive_Refocusing',
                     'CERQ_Refocusing_on_Planning','CERQ_Positive_Reappraisal','CERQ_Putting_into_Perspective','CERQ_Catastrophizing','CERQ_Other_Blame',
                     'group_recode']

In [42]:
# Initialize a results dictionary to store the results of each model
interaction_results = []

# Loop through each questionnaire and speech feature pair
for questionnaire in questionnaire_col:
    for speech_feature in category_cols:
        try:
            # Ensure the questionnaire and group columns exist
            if questionnaire not in analysis_df.columns or 'Group_recode' not in analysis_df.columns:
                #print(f"Warning: {questionnaire} or 'Group_recode' not found in DataFrame columns.")
                continue
            
            # Define interaction term for the model
            analysis_df['interaction_term'] = analysis_df[questionnaire] * analysis_df['Group_recode']

            # Define the formula for the moderation model
            formula = f"{speech_feature} ~ {questionnaire} + Group_recode + interaction_term"

            # Fit the model
            model = smf.ols(formula, data=analysis_df).fit()

            # Check if interaction_term is present in the model results
            if 'interaction_term' in model.pvalues:
                # Extract relevant results
                interaction_pvalue = model.pvalues['interaction_term']
                interaction_coeff = model.params['interaction_term']

                # Append results to list
                interaction_results.append({
                    'Speech Feature': speech_feature,
                    'Questionnaire': questionnaire,
                    'Interaction Coeff': interaction_coeff,
                    'Interaction p-Value': interaction_pvalue
                })
            else:
                print(f"Interaction term not found in model for {questionnaire} and {speech_feature}")

        except Exception as e:
            # Print error message if something goes wrong
            print(f"Error processing {questionnaire} and {speech_feature}: {e}")

# Convert results to DataFrame for easier viewing
interaction_df = pd.DataFrame(interaction_results)

# Display the results, showing significant interactions first
interaction_df = interaction_df.sort_values(by='Interaction p-Value').reset_index(drop=True)
#print(interaction_df.head())  # Show top results

# Filter to show only significant interactions (e.g., p < 0.05)
significant_interactions = interaction_df[interaction_df['Interaction p-Value'] < 0.05]
significant_interactions

Unnamed: 0,Speech Feature,Questionnaire,Interaction Coeff,Interaction p-Value
0,duration_pos,CERQ_Acceptance,-4.730577,0.004376
1,neutral_sentence_ratio_neg,DERS_Impulse_Control_Problems,-0.032274,0.005977
2,pause_durations_sum_pos,CERQ_Acceptance,-2.154429,0.006537
3,word_count_pos,CERQ_Acceptance,-12.436298,0.007728
4,negative_sentence_ratio_pos,BVAQ,0.010151,0.013305
5,number_of_pauses_pos,CERQ_Acceptance,-8.194921,0.013364
6,loudness_mean_pos,BVAQ,0.411642,0.016978
7,positive_sentence_ratio_pos,DERS_Non_Acceptance_of_Emotional_Reactions,-0.033256,0.017778
8,word_frequency_mean_pos,BVAQ,-0.016681,0.019302
9,loudness_mean_pos,DERS_Lack_of_Emotional_Clarity,1.165787,0.024009


In [43]:
significant_interactions.round(3)
#significant_interactions.to_csv('W:/Fmri_Forschung/Allerlei/JuliaS/GitHub/SubliminalVideoPriming/data/speech/significant_interactions_group.csv', index=False)

Unnamed: 0,Speech Feature,Questionnaire,Interaction Coeff,Interaction p-Value
0,duration_pos,CERQ_Acceptance,-4.731,0.004
1,neutral_sentence_ratio_neg,DERS_Impulse_Control_Problems,-0.032,0.006
2,pause_durations_sum_pos,CERQ_Acceptance,-2.154,0.007
3,word_count_pos,CERQ_Acceptance,-12.436,0.008
4,negative_sentence_ratio_pos,BVAQ,0.01,0.013
5,number_of_pauses_pos,CERQ_Acceptance,-8.195,0.013
6,loudness_mean_pos,BVAQ,0.412,0.017
7,positive_sentence_ratio_pos,DERS_Non_Acceptance_of_Emotional_Reactions,-0.033,0.018
8,word_frequency_mean_pos,BVAQ,-0.017,0.019
9,loudness_mean_pos,DERS_Lack_of_Emotional_Clarity,1.166,0.024


In [44]:
# Convert results to DataFrame
interaction_df = pd.DataFrame(interaction_results)

# Apply multiple comparisons correction (e.g., Benjamini-Hochberg FDR correction)
# This will adjust the p-values in the 'Interaction p-Value' column
adjusted_results = multipletests(interaction_df['Interaction p-Value'], alpha=0.05, method='bonferroni')

# Add adjusted p-values and significance to the DataFrame
interaction_df['p_adj'] = adjusted_results[1]  # Adjusted p-values
interaction_df['significant'] = adjusted_results[0]  # True/False for significance after correction

# Sort by adjusted p-values and display significant results
significant_interactions_corrected = interaction_df[interaction_df['significant']].sort_values(by='p_adj')
significant_interactions_corrected

Unnamed: 0,Speech Feature,Questionnaire,Interaction Coeff,Interaction p-Value,p_adj,significant
