<a href="https://colab.research.google.com/github/Janina712/RhythmMetrics_Duration/blob/main/3_2_Match_Groups.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **0. Imports & Set-Up**

In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import random as random
import os
import math
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%cd /content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics/2.BreathGroups_Assigned/

/content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics/2.BreathGroups_Assigned


In [None]:
reading = pd.read_excel("reading_TextGrid_comb_BG_loop.xlsx")
frog = pd.read_excel("frog_TextGrid_comb_BG_loop.xlsx")

In [None]:
# IDs by condition
IDs_reading = ['24fa']
IDs_frog = ['24fa']

In [None]:
# IDs by group
IDs_pws = ['24fa']
control_IDs = []

# combine both groups
IDs_col = pd.DataFrame(columns =['ID'])
IDs_col['ID'] =['24fa']

# **1. Define Functions**

**1.1 Assign Participant Group**

In [None]:
def assign_group(df):
  group = pd.DataFrame(index = range(len(df)),columns=["Group"])

  for i in range(0,len(df)):
    if "_" in df["ID"][i]:
      group["Group"][i] = "Control"
    else:
      group["Group"][i] = "PWS"

  df_out = pd.concat([ group, df], axis=1)
  df_out = df_out[df_out.Type != "silence"]
  df_out.index = range(len(df_out.index))
  df_out.drop(['Unnamed: 0'], axis=1 , inplace = True)
  return(df_out)

**1.2 Count Vowels**

In [None]:
def count_vowels(df, condition):
  df_vowels = df[df["Type"]  == "vowel"]
  df_vowels.index = range(len(df_vowels.index))

  # reading or interview condition
  if condition == "frog":
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  syll_col = pd.DataFrame()  ## initialize group-level dataframe
  for ID in IDs_here: ## loop over participnts
    syll_current_ID = pd.DataFrame()   ## initialize participant-level dataframe
    subset_sounds = df[df["ID"] == ID]  # get subset of sound dataframe that corresponds to current participant
    subset_sounds.index = range(len(subset_sounds.index)) # reset index
    subset_vowels = subset_sounds[subset_sounds["Type"] == "vowel"]  # get subset of sound dataframe that corresponds to current participant
    subset_vowels.index = range(len(subset_vowels.index)) # reset index
    syll = subset_vowels["Breath.Group"].value_counts().sort_index() # count how often a certain Breath group occurs for this participant
    syll.index = range(len(syll.index)) # reset index
    for a in range (0,len(syll)): # go through all breath groups that this participant produced
      syll_current_BG = pd.DataFrame()  ## initialize BG-level dataframe
      syll_current_BG = pd.DataFrame(np.repeat(syll.iloc[a], syll.iloc[a], axis=0)) #replicate the sum sum times
      syll_current_ID = syll_current_ID.append([syll_current_BG], ignore_index = True) # add BG-level dataframe to participant-level dataframe
    syll_col = syll_col.append([syll_current_ID], ignore_index = True) # add participant-level dataframe to group-level dataframe

  df_vowels = pd.concat([df_vowels, syll_col], axis=1)
  df_vowels.rename(columns = {'Syllables':'Unmached_Vowels'}, inplace = True)
  df_vowels.rename(columns = {0:'Syllables'}, inplace = True) # rename new column
  pre_df_vowel_avg = df_vowels.groupby("Group").mean()    ########### average counting 13 13 times

  return(df_vowels, pre_df_vowel_avg)

**1.3 Count Consonants**

In [None]:
def count_consonants(df, condition):
  df_consonants = df[df["Type"]  == "consonant"]
  df_consonants.index = range(len(df_consonants.index))

  # reading or interview condition
  if condition == "frog":
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  con_col = pd.DataFrame()  ## initialize group-level dataframe
  for ID in IDs_here: ## loop over participnts
    con_current_ID = pd.DataFrame()   ## initialize participant-level dataframe
    subset_sounds = df[df["ID"] == ID]  # get subset of sound dataframe that corresponds to current participant
    subset_sounds.index = range(len(subset_sounds.index)) # reset index
    subset_cons = subset_sounds[subset_sounds["Type"] == "consonant"]  # get subset of sound dataframe that corresponds to current participant
    subset_cons.index = range(len(subset_cons.index)) # reset index
    con = subset_cons["Breath.Group"].value_counts().sort_index() # count how often a certain Breath group occurs for this participant
    con.index = range(len(con.index)) # reset index
    for a in range (0,len(con)): # go through all breath groups that this participant produced
      con_current_BG = pd.DataFrame()  ## initialize BG-level dataframe
      con_current_BG = pd.DataFrame(np.repeat(con.iloc[a], con.iloc[a], axis=0)) #replicate the sum sum times
      con_current_ID = con_current_ID.append([con_current_BG], ignore_index = True) # add BG-level dataframe to participant-level dataframe
    con_col = con_col.append([con_current_ID], ignore_index = True) # add participant-level dataframe to group-level dataframe

  df_consonants = pd.concat([df_consonants, con_col], axis=1)
  df_consonants.rename(columns = {'Consonants':'Unmatched_Cons'}, inplace = True)
  df_consonants.rename(columns = {0:'Consonants'}, inplace = True) # rename new column
  pre_df_consonant_avg = df_consonants.groupby("Group").mean()    ########### average counting 13 13 times

  return(df_consonants, pre_df_consonant_avg)

**1.4 Average Vowel Count Per Participant**

In [None]:
def participant_vowel_avg(df, condition):
  if condition == "frog":
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  n = -1
  avg_col = pd.DataFrame(index = range(len(IDs_col)),columns=["Syllables"])   # Syllables
  for ID in IDs_here: ## loop over participnts
    n = n + 1
    subset_BGs = df[df["ID"] == ID]  # get subset of sound dataframe that corresponds to current participant
    subset_BGs.index = range(len(subset_BGs.index)) # reset index
    BG_avg = subset_BGs.groupby("Breath.Group").mean()
    subj_avg = BG_avg["Syllables"].mean()
    avg_col["Syllables"][n] = subj_avg

    group_col = pd.DataFrame(columns=['Group'])
    IDs_col_here = pd.DataFrame(columns=['ID'])
    group_list = []
    for i in range(0,len(IDs_here)):
      if "_" in IDs_here[i]: #"ID"
        group_list.append("Control")
      else:
        group_list.append('PWS')

    group_col["Group"] = group_list
    IDs_col_here["ID"] = IDs_here

  df_participant_vowel_avg = pd.concat([group_col, IDs_col_here, avg_col], axis=1)

  return(df_participant_vowel_avg)

**1.5 Average Consonant Count Per Participant**

In [None]:
def participant_consonant_avg(df, condition):
  if condition == "frog":
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  n = -1
  avg_col = pd.DataFrame(index = range(len(IDs_col)),columns=["Consonants"])
  for ID in IDs_here: ## loop over participnts
    n = n + 1
    subset_BGs = df[df["ID"] == ID]  # get subset of sound dataframe that corresponds to current participant
    subset_BGs.index = range(len(subset_BGs.index)) # reset index
    BG_avg = subset_BGs.groupby("Breath.Group").mean()
    subj_avg = BG_avg["Consonants"].mean()
    avg_col["Consonants"][n] = subj_avg

    group_col = pd.DataFrame(columns=['Group'])
    IDs_col_here = pd.DataFrame(columns=['ID'])
    group_list = []
    for i in range(0,len(IDs_here)):
      if "_" in IDs_here[i]: #"ID"
        group_list.append("Control")
      else:
        group_list.append('PWS')

    group_col["Group"] = group_list
    IDs_col_here["ID"] = IDs_here

  df_participant_cons_avg = pd.concat([group_col, IDs_col, avg_col], axis=1)

  return(df_participant_cons_avg)

**1.6 Compare Consonant and Vowel Counts Across Groups**

In [None]:
def compare_groups(df_vowels, df_consonants):
  if df_vowels["Group"][0] == "PWS":
    first_IDs = IDs_pws
    first = "PWS"
    second_IDs = control_IDs
    second = "Control"
  else:
    first_IDs = control_IDs
    first = "Control"
    second_IDs = IDs_pws
    second = "PWS"

  second_v = (df_vowels.groupby('ID').mean()["Syllables"][len(first_IDs):(len(first_IDs) + len(control_IDs))]).mean()
  first_v = (df_vowels.groupby('ID').mean()["Syllables"][0:len(first_IDs)]).mean()
  difference_v = abs(second_v - first_v)

  second_c = (df_consonants.groupby("ID").mean()["Consonants"][len(first_IDs):(len(first_IDs)+len(control_IDs))]).mean()
  first_c = (df_consonants.groupby('ID').mean()["Consonants"][0:len(first_IDs)]).mean()
  difference_c = abs(second_c - first_c)

  if first_v > second_v:
    longer = first
    shorter = second
    short_v = second_v
    short_c = second_c
  elif second_v > first_v:
    longer = second
    shorter = first
    short_v = first_v
    short_c = first_c
  elif (np.isnan(first_v)):
    longer = second
    shorter = first
    short_v = first_v
    short_c = first_c
  else:
    longer = first
    shorter = second
    short_v = second_v
    short_c = second_c

  string1 = (f"{first} produced on average {round(first_v,2)} syllables per utterance, while {second} participants produced {round(second_v,2)} syllables on average.")
  string2 = (f"This means that there is a difference of {round(difference_v,2)} syllables per utterance.")
  string3 = (f"\n{first} produced on average {round(first_c,2)} consonants per utterance, while {second} participants produced {round(second_c,2)} consonants on average.")
  string4 = (f"This means that there is a difference of {round(difference_c,2)} consonants per utterance.")
  string5 =(f"\n{longer} participants produced longer utterances than {shorter} on the frog condition.")

  return(string1, string2, string3, string4, string5, short_v, short_c, difference_v, difference_c, longer, shorter)

**1.7 Match Number of Vowels Across Groups**

In [None]:
def match_vowels(df_vowels, short_group, cut_v, condition):
  if condition == 'frog':
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  groups = []
  for person in IDs_here:
    if person in IDs_pws:
      groups.append("PWS")
    if person in control_IDs:
      groups.append("Control")
    else:
      pass

  if len(groups) < 2:
    print(f"Only one group for {condition} condition. Matching not possible.")
    syll_col_matched = df_vowels
  else:
    cut_v = round(cut_v)
    syll_col_matched = pd.DataFrame()  ## initialize group-level dataframe
    participant = pd.DataFrame()  ## initialize participant-level dataframe
    if short_group == "PWS":
      df_control = df_vowels[df_vowels["Group"]  == "Control"]
      df_control.index = range(len(df_control.index)) ## group
      for ID in control_IDs:
        df_control_ID = df_control[df_control["ID"]  == ID]
        df_control_ID.index = range(len(df_control_ID.index))  ### person
        BGs = df_control_ID["Breath.Group"].unique()
        for BG in BGs:
          df_control_ID_BG = df_control_ID[df_control_ID["Breath.Group"]  == BG]
          df_control_ID_BG.index = range(len(df_control_ID_BG.index)) ## BG
          if len(df_control_ID_BG) >= cut_v:
            df_control_ID_BG.drop(df_control_ID_BG.tail(cut_v).index, inplace = True)
            participant = participant.append([df_control_ID_BG], ignore_index = True)
          else:
            participant = participant.append([df_control_ID_BG], ignore_index = True)
    else:
        df_control = df_vowels[df_vowels["Group"]  == "PWS"]
        df_control.index = range(len(df_control.index)) ## group
        for ID in IDs_pws:
          df_control_ID = df_control[df_control["ID"]  == ID]
          df_control_ID.index = range(len(df_control_ID.index))  ### person
          BGs = df_control_ID["Breath.Group"].unique()
          for BG in BGs:
            df_control_ID_BG = df_control_ID[df_control_ID["Breath.Group"]  == BG]
            df_control_ID_BG.index = range(len(df_control_ID_BG.index)) ## BG
            if len(df_control_ID_BG) >= cut_v:
              df_control_ID_BG.drop(df_control_ID_BG.tail(cut_v).index, inplace = True)
              participant = participant.append([df_control_ID_BG], ignore_index = True)
            else:
              participant = participant.append([df_control_ID_BG], ignore_index = True)
        syll_col_matched = syll_col_matched.append([participant], ignore_index = True) # add participant-level dataframe to group-level dataframe
  return(syll_col_matched)

**1.8 Match Number of Consonants Across Groups**

In [None]:
def match_consonants(df_consonants, short_group, cut_c, condition):
  if condition == 'frog':
    IDs_here = IDs_frog
  else:
    IDs_here = IDs_reading

  groups = []
  for person in IDs_here:
    if person in IDs_pws:
      groups.append("PWS")
    if person in control_IDs:
      groups.append("Control")
    else:
      pass

  if len(groups) < 2:
    print(f"Only one group for {condition} condition. Matching not possible.")
    cons_col_matched = df_consonants
  else:
    cut_c = round(cut_c)
    cons_col_matched = pd.DataFrame()  ## initialize group-level dataframe
    participant = pd.DataFrame()  ## initialize participant-level dataframe
    if short_group == "PWS":
      df_control = df_consonants[df_consonants["Group"]  == "Control"]
      df_control.index = range(len(df_control.index)) ## group
      for ID in control_IDs:
        df_control_ID = df_control[df_control["ID"]  == ID]
        df_control_ID.index = range(len(df_control_ID.index))  ### person
        BGs = df_control_ID["Breath.Group"].unique()
        for BG in BGs:
          df_control_ID_BG = df_control_ID[df_control_ID["Breath.Group"]  == BG]
          df_control_ID_BG.index = range(len(df_control_ID_BG.index)) ## BG
          if len(df_control_ID_BG) >= cut_c:
            df_control_ID_BG.drop(df_control_ID_BG.tail(cut_c).index, inplace = True)
            participant = participant.append([df_control_ID_BG], ignore_index = True)
          else:
            participant = participant.append([df_control_ID_BG], ignore_index = True)
        cons_col_matched = cons_col_matched.append([participant], ignore_index = True) # add participant-level dataframe to group-level dataframe
    else:
      df_control = df_consonants[df_consonants["Group"]  == "PWS"]
      df_control.index = range(len(df_control.index)) ## group
      for ID in IDs_pws:
        df_control_ID = df_control[df_control["ID"]  == ID]
        df_control_ID.index = range(len(df_control_ID.index))  ### person
        BGs = df_control_ID["Breath.Group"].unique()
        for BG in BGs:
          df_control_ID_BG = df_control_ID[df_control_ID["Breath.Group"]  == BG]
          df_control_ID_BG.index = range(len(df_control_ID_BG.index)) ## BG
          if len(df_control_ID_BG) >= cut_c:
            df_control_ID_BG.drop(df_control_ID_BG.tail(cut_c).index, inplace = True)
            participant = participant.append([df_control_ID_BG], ignore_index = True)
          else:
            participant = participant.append([df_control_ID_BG], ignore_index = True)
      cons_col_matched = cons_col_matched.append([participant], ignore_index = True) # add participant-level dataframe to group-level dataframe
  return(cons_col_matched)

**1.9 Count Utterances**

In [None]:
def countUtterances(df):
  sum = 0
  for ID in df["ID"].unique():
    subset_ID = df[df["ID"] == ID]
    sum = sum + len(subset_ID["Breath.Group"].unique())
  return(sum)

**1.10. Test if Remaining Vowel Difference is Significant**

In [None]:
def ttest_vowelDifference(matched_participant_df, shorter_group):
  if shorter_group == "PWS":
    longer_group = "Control"
  else:
    longer_group = "PWS"

  ttest_control = matched_participant_df[matched_participant_df["Group"] == "Control"]
  ttest_pws = matched_participant_df[matched_participant_df["Group"] == "PWS"]

  ttest_control = ttest_control.dropna()
  ttest_pws = ttest_pws.dropna()

  control_v =  ttest_control.mean()["Syllables"]
  pws_v =  ttest_pws.mean()["Syllables"]

  difference_v_matched = control_v - pws_v

  df_v_matched_test = stats.ttest_ind(np.array(ttest_control['Syllables'].astype(float)),
                      np.array(ttest_pws['Syllables']).astype(float))
  if df_v_matched_test[1] >= 0.05:
    level = "insignificant"
  elif df_v_matched_test[1] < 0.05:
    level = "significant"
  else:
    level = 'undetermined'
  string11 = (f"{longer_group} utterances that were longer than the mean average length of {shorter_group} utterances were shortened. \nAfter matching, PWS had {round(pws_v,2)} vowels per utterance, while control participants had {round(control_v,2)} per utterance. \nThe difference is reduced to {abs(round(difference_v_matched,2))}, which is statistically {level} (p = {round(df_v_matched_test[1],2)}).")

  return(string11)

**1.11. Test if Remaining Consonant Difference is Significant**

In [None]:
def ttest_conDifference(matched_participant_df, shorter_group):
  if shorter_group == "PWS":
    longer_group = "Control"
  else:
    longer_group = "PWS"
  ttest_control = matched_participant_df[matched_participant_df["Group"] == "Control"]
  ttest_pws = matched_participant_df[matched_participant_df["Group"] == "PWS"]

  ttest_control = ttest_control.dropna()
  ttest_pws = ttest_pws.dropna()

  control_c =  ttest_control.mean()["Consonants"]
  pws_c =  ttest_pws.mean()["Consonants"]

  difference_c_matched = control_c - pws_c

  df_c_matched_test = stats.ttest_ind(np.array(ttest_control['Consonants']).astype(float),
                        np.array(ttest_pws['Consonants']).astype(float))
  if df_c_matched_test[1] >= 0.05:
    level = "insignificant"
  elif df_c_matched_test[1] < 0.05:
    level = "significant"
  else:
    level = "undetermined"
  string11 = (f"Control utterances that were longer than the mean average length of PWS utterances, were shortened. \nAfter matching, PWS had {round(pws_c,2)} consonants per utterance, while control participants had {round(control_c,2)} per utterance. \nThe difference is reduced to {abs(round(difference_c_matched,2))}, which is statistically {level} (p = {round(df_c_matched_test[1],2)}).")

  return(string11)

# **2. Match Across Groups (PWS vs. PWNS)**

**2.1 Pre-Matching**

In [None]:
# add new column to dataframe that denotes participant's group membership
frog = assign_group(frog)
reading = assign_group(reading)

In [None]:
# account for case differences in annotation
for i in range (0, len(frog)):
  frog["FluencyStatus"][i] = frog["FluencyStatus"][i].lower().strip()
for i in range (0, len(reading)):
  reading["FluencyStatus"][i] = reading["FluencyStatus"][i].lower().strip()

In [None]:
# exclude disfluent utterances from further analysis
frog_fluent = frog[frog["FluencyStatus"] == "fluent"]
frog_fluent.index = range(len(frog_fluent.index))
reading_fluent = reading[reading["FluencyStatus"] == "fluent"]
reading_fluent.index = range(len(reading_fluent.index))

In [None]:
# count vowels per utterance
frog_vowels_fluent, pre_frog_vowel_avg_across_BG = count_vowels(frog_fluent, "frog")
reading_vowels_fluent, pre_reading_vowel_avg_across_BG = count_vowels(reading_fluent, "reading")

In [None]:
# count consonants per utterance
[frog_consonants_fluent, pre_frog_consonant_avg_across_BG]  = count_consonants(frog_fluent,'frog')
[reading_consonants_fluent, pre_reading_consonants_avg_across_BG] = count_consonants(reading_fluent,'reading')

In [None]:
#average # vowels per breath group for each participant
frog_participant_vowel_avg_fluent = participant_vowel_avg(frog_vowels_fluent,'frog')
reading_participant_vowel_avg_fluent = participant_vowel_avg(reading_vowels_fluent,'reading')

In [None]:
#average # consonants per breath group for each participant
frog_participant_cons_avg_fluent = participant_consonant_avg(frog_consonants_fluent,'frog')
reading_participant_cons_avg_fluent = participant_consonant_avg(reading_consonants_fluent,'reading')

In [None]:
#average # per breath group for each participant and fluency status in frog
if len(frog_participant_vowel_avg_fluent) > 1:
  frog_participant_vowels_avg_pws = frog_participant_vowel_avg_fluent[frog_participant_vowel_avg_fluent["Group"] == "PWS"]
  frog_participant_vowels_avg_control = frog_participant_vowel_avg_fluent[frog_participant_vowel_avg_fluent["Group"] == "Control"]
  frog_participant_consonants_avg_pws = frog_participant_cons_avg_fluent[frog_participant_cons_avg_fluent["Group"] == "PWS"]
  frog_participant_consonants_avg_control = frog_participant_cons_avg_fluent[frog_participant_cons_avg_fluent["Group"] == "Control"]
else:
  frog_participant_vowels_avg_pws = pd.DataFrame()
  frog_participant_vowels_avg_control = pd.DataFrame()
  frog_participant_consonants_avg_pws = pd.DataFrame()
  frog_participant_consonants_avg_control = pd.DataFrame()

In [None]:
#average # per breath group for each participant and fluency status in reading
if len(reading_participant_vowel_avg_fluent) > 1:
  reading_participant_vowels_avg_pws = reading_participant_vowel_avg_fluent[reading_participant_vowel_avg_fluent["Group"] == "PWS"]
  reading_participant_vowels_avg_control = reading_participant_vowel_avg_fluent[reading_participant_vowel_avg_fluent["Group"] == "Control"]
  reading_participant_consonants_avg_pws = reading_participant_cons_avg_fluent[reading_participant_cons_avg_fluent["Group"] == "PWS"]
  reading_participant_consonants_avg_control = reading_participant_cons_avg_fluent[reading_participant_cons_avg_fluent["Group"] == "Control"]
else:
  reading_participant_vowels_avg_pws = pd.DataFrame()
  reading_participant_vowels_avg_control = pd.DataFrame()
  reading_participant_consonants_avg_pws = pd.DataFrame()
  reading_participant_consonants_avg_control = pd.DataFrame()

In [None]:
# compare group averages of number of vowels per utterance and number of consonants per utterance
# frog
string1, string2, string3, string4, string5, short_v_frog_fluent, short_c_frog_fluent, difference_v_frog_fluent, difference_c_frog_fluent, long_group_frog, short_group_frog = compare_groups(frog_participant_vowel_avg_fluent, frog_participant_cons_avg_fluent)

print(string1)
print(string2)
print(string3)
print(string4)
print(string5)

PWS produced on average 6.66 syllables per utterance, while Control participants produced 5.86 syllables on average.
This means that there is a difference of 0.8 syllables per utterance.

PWS produced on average 9.95 consonants per utterance, while Control participants produced 7.57 consonants on average.
This means that there is a difference of 2.38 consonants per utterance.

PWS participants produced longer utterances than Control on the frog condition.


In [None]:
# compare group averages of number of vowels per utterance and number of consonants per utterance
# reading
string5, string6, string7, string8, string9, short_v_read_fluent, short_c_read_fluent, difference_v_read_fluent, difference_c_read_fluent, long_group_read, short_group_read  = compare_groups(reading_participant_vowel_avg_fluent, reading_participant_cons_avg_fluent)

print(string5)
print(string6)
print(string7)
print(string8)
print(string9)

Control produced on average 7.0 syllables per utterance, while PWS participants produced nan syllables on average.
This means that there is a difference of nan syllables per utterance.

Control produced on average 9.92 consonants per utterance, while PWS participants produced nan consonants on average.
This means that there is a difference of nan consonants per utterance.

Control participants produced longer utterances than PWS on the frog condition.


**2.2 Matching**

In [None]:
# match number of vowels per utterance by cutting long utterances by the difference in average vowel number between groups
matched_vowels_read_fluent = match_vowels(reading_vowels_fluent, short_group_read,  difference_v_read_fluent,'reading')
matched_vowels_frog_fluent = match_vowels(frog_vowels_fluent, short_group_frog, difference_v_frog_fluent,'frog')

Only one group for reading condition. Matching not possible.


In [None]:
# match number of consonants per utterance by cutting long control utterances by the difference in average consonant number between groups
matched_consonants_read_fluent = match_consonants(reading_consonants_fluent, short_group_read, difference_c_read_fluent,'reading')
matched_consonants_frog_fluent = match_consonants(frog_consonants_fluent, short_group_frog, difference_c_frog_fluent,'frog')

Only one group for reading condition. Matching not possible.


In [None]:
if np.isnan(difference_c_read_fluent):
  difference_c_read_fluent = 0
if np.isnan(difference_c_frog_fluent):
  difference_c_frog_fluent = 0
if np.isnan(difference_v_read_fluent):
  difference_v_read_fluent = 0
if np.isnan(difference_c_frog_fluent):
  difference_v_frog_fluent = 0

print(f"{long_group_read} utterances were cut by {round(difference_v_read_fluent)} vowels and {round(difference_c_read_fluent)} consonants in the reading condition.")
print(f"{long_group_frog} utterances were cut by {round(difference_v_frog_fluent)} vowels and {round(difference_c_frog_fluent)} consonants in the frog condition.")

Control utterances were cut by 0 vowels and 0 consonants in the reading condition.
PWS utterances were cut by 1 vowels and 2 consonants in the frog condition.


**2.3 Post-Matching**

In [None]:
# combined shortened dataframe with original dataframe
if short_group_frog == "PWS":
  matched_vowels_frog_fluent = matched_vowels_frog_fluent.append(frog_vowels_fluent[frog_vowels_fluent["Group"]=="PWS"], ignore_index=True)
  matched_consonants_frog_fluent =  matched_consonants_frog_fluent.append(frog_consonants_fluent[frog_consonants_fluent["Group"]=="PWS"], ignore_index=True)
elif short_group_frog == "Control":
  matched_vowels_frog_fluent = matched_vowels_frog_fluent.append(frog_vowels_fluent[frog_vowels_fluent["Group"]=="Control"], ignore_index=True)
  matched_consonants_frog_fluent = matched_consonants_frog_fluent.append(frog_consonants_fluent[frog_consonants_fluent["Group"]=="Control"], ignore_index=True)

In [None]:
# combined shortened dataframe with original dataframe
if short_group_read == "PWS":
  matched_vowels_reading_fluent = matched_vowels_read_fluent.append(reading_vowels_fluent[reading_vowels_fluent["Group"]=="PWS"], ignore_index=True)
  matched_consonants_reading_fluent =  matched_consonants_read_fluent.append(reading_consonants_fluent[reading_consonants_fluent["Group"]=="PWS"], ignore_index=True)
elif short_group_read == "Control":
  matched_vowels_reading_fluent = matched_vowels_read_fluent.append(reading_vowels_fluent[reading_vowels_fluent["Group"]=="Control"], ignore_index=True)
  matched_consonants_reading_fluent = matched_consonants_read_fluent.append(reading_consonants_fluent[reading_consonants_fluent["Group"]=="Control"], ignore_index=True)

In [None]:
# count vowels per utterance after matching
[post_frog_vowels_fluent, post_frog_vowel]  = count_vowels(matched_vowels_frog_fluent,'frog')
[post_reading_vowels_fluent, post_reading_vowel]  = count_vowels(matched_vowels_read_fluent,'reading')

In [None]:
# average # vowels per breath group for each participant after matching
post_frog_participant_vowels_avg_fluent = participant_vowel_avg(post_frog_vowels_fluent, 'frog')
post_reading_participant_vowels_avg_fluent = participant_vowel_avg(post_reading_vowels_fluent,'reading')

In [None]:
# count consonants per utterance after matching
[post_frog_consonants_fluent, post_frog_consonant_avg_fluent]  = count_consonants(matched_consonants_frog_fluent, 'frog')
[post_reading_consonants_fluent, post_reading_consonant_avg_fluent]  = count_consonants(matched_consonants_read_fluent,'reading')

In [None]:
#average # consonants per breath group for each participant after matching
post_frog_participant_cons_avg_fluent = participant_consonant_avg(post_frog_consonants_fluent, 'frog')
post_reading_participant_cons_avg_fluent = participant_consonant_avg(post_reading_consonants_fluent,'reading')

In [None]:
# compare group averages of number of vowels per utterance and number of consonants per utterance
# frog
string1, string2, string3, string4, string5, short_v_frog_fluent, short_c_frog_fluent, difference_v_frog_fluent, difference_c_frog_fluent, long_group_frog, short_group_frog = compare_groups(post_frog_participant_vowels_avg_fluent, post_frog_participant_cons_avg_fluent)

print(string1)
print(string2)
print(string3)
print(string4)
print(string5)

PWS produced on average 5.95 syllables per utterance, while Control participants produced 5.86 syllables on average.
This means that there is a difference of 0.09 syllables per utterance.

PWS produced on average 8.36 consonants per utterance, while Control participants produced 7.57 consonants on average.
This means that there is a difference of 0.79 consonants per utterance.

PWS participants produced longer utterances than Control on the frog condition.


In [None]:
# compare group averages of number of vowels per utterance and number of consonants per utterance
# reading
string5, string6, string7, string8, string9, short_v_read_fluent, short_c_read_fluent, difference_v_read_fluent, difference_c_read_fluent, long_group_read, short_group_read  = compare_groups(post_reading_participant_vowels_avg_fluent, post_reading_participant_cons_avg_fluent)

print(string5)
print(string6)
print(string7)
print(string8)
print(string9)

Control produced on average 7.0 syllables per utterance, while PWS participants produced nan syllables on average.
This means that there is a difference of nan syllables per utterance.

Control produced on average 9.92 consonants per utterance, while PWS participants produced nan consonants on average.
This means that there is a difference of nan consonants per utterance.

Control participants produced longer utterances than PWS on the frog condition.


# **3. Significance After Matching**

In [None]:
# test if average number of vowels per breathgroup is still significantly different between groups after matching
# frog
string1 = ttest_vowelDifference(post_frog_participant_vowels_avg_fluent, short_group_frog)
print(string1)

In [None]:
# test if average number of vowels per breathgroup is still significantly different between groups after matching
# reading
string2 = ttest_vowelDifference(post_reading_participant_vowels_avg_fluent, short_group_read)
print(string2)

In [None]:
# test if average number of consonants per breathgroup is still significantly different between groups after matching
# frog
string1 = ttest_conDifference(post_frog_participant_cons_avg_fluent, short_group_frog)
print(string1)

In [None]:
# test if average number of consonants per breathgroup is still significantly different between groups after matching
# reading
string2 = ttest_conDifference(post_reading_participant_cons_avg_fluent, short_group_read)
print(string2)

# **4. Count Utterances in Each Category**

In [None]:
pws_frog_fluent = post_frog_vowels_fluent[post_frog_vowels_fluent["Group"] == "PWS"]
control_frog_fluent = post_frog_vowels_fluent[post_frog_vowels_fluent["Group"] == "Control"]
pws_read_fluent = post_reading_vowels_fluent[post_reading_vowels_fluent["Group"] == "PWS"]
control_read_fluent = post_reading_vowels_fluent[post_reading_vowels_fluent["Group"] == "Control"]

In [None]:
number_reading_control_fluent = countUtterances(control_read_fluent)
number_frog_control_fluent = countUtterances(control_frog_fluent)
number_reading_pws_fluent = countUtterances(pws_read_fluent)
number_frog_pws_fluent = countUtterances(pws_frog_fluent)

In [None]:
print(f"There are {number_reading_pws_fluent} utterances produced by PWS in the reading condition and {number_reading_control_fluent} utterance produced by control.")
print(f"\nThere are {number_frog_pws_fluent} utterances produced by PWS in the frog condition and {number_frog_control_fluent} utterance produced by control.")

There are 0 utterances produced by PWS in the reading condition and 13 utterance produced by control.

There are 39 utterances produced by PWS in the frog condition and 7 utterance produced by control.


# **5. Create Output Dataframes**

In [None]:
post_reading_vowels_fluent.drop(["Unmached_Vowels"],axis=1 , inplace = True)
post_frog_vowels_fluent.drop(["Unmached_Vowels"],axis=1 , inplace = True)
post_reading_consonants_fluent.drop(["Unmatched_Cons"],axis=1 , inplace = True)
post_frog_consonants_fluent.drop(["Unmatched_Cons"],axis=1 , inplace = True)

# **6. Save**

In [None]:
%cd /content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics

/content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics


In [None]:
dir = "3.MLU_Matched"

if os.path.exists(dir) == False:
  os.mkdir(dir)

In [None]:
%cd /content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics/3.MLU_Matched/

/content/gdrive/MyDrive/ATAS_Plus/Duration_Metrics/3.MLU_Matched


In [None]:
# export
post_reading_vowels_fluent.to_excel("matchedVowels_reading_FLUENT.xlsx")
post_reading_consonants_fluent.to_excel("matchedConsonants_reading_FLUENT.xlsx")
post_frog_vowels_fluent.to_excel("matchedVowels_frog_FLUENT.xlsx")
post_frog_consonants_fluent.to_excel("matchedConsonants_frog_FLUENT.xlsx")