# Linguistic Style Matching

Plotting LSM scores with 95% confidence intervals calculated with non-parametric bootstrap resampling. We provide the precomputed LSM scores for the function word categories for each post-reply pair, since a license is required for LIWC (see http://liwc.wpengine.com/)

In [1]:
import pandas as pd 
from utils import bootstrap_resampling


df = pd.read_pickle('../Data/func-lsm-info.pickle')
task_df = pd.read_csv('../Data/task_data.csv')
subreddit_topics = pd.read_csv('../Data/subreddit_topics.csv')

In [2]:
groups = {}
for topic in subreddit_topics.topic.unique():
    topic_subreddits = subreddit_topics[subreddit_topics.topic==topic].subreddit.values
    groups[topic] = topic_subreddits

groups['All'] = subreddit_topics.subreddit.values

In [3]:
lsm_table = {'Category':[], 'Mean-MHP':[], 'Lower Error-MHP':[], 'Upper Error-MHP':[], 'Mean-Peer':[], 'Lower Error-Peer':[], 'Upper Error-Peer':[]}

"""mhp"""
for group, group_subreddits in groups.items():
    """
    get the task df for this group of subreddits 
    """
    print(group)

    group_subreddits_df = task_df[task_df['subreddit'].isin(group_subreddits)]
    tmp = group_subreddits_df[group_subreddits_df['author_type'] == 'mhp'].index
    tmp = df[df.index.isin(tmp)]

    values = list(tmp['composite-lsm-func'].values)
    ave, lower_error, upper_error = bootstrap_resampling(values)

    lsm_table['Category'].append(group)
    lsm_table['Mean-MHP'].append(ave)
    lsm_table['Lower Error-MHP'].append(lower_error)
    lsm_table['Upper Error-MHP'].append(upper_error)

Trauma & Abuse (Trauma)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 57490.32it/s]


Psychosis & Anxiety (Anx)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 15736.72it/s]


Compulsive Disorders (Compuls.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 103023.02it/s]


Coping & Therapy (Cope)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:01<00:00, 9793.01it/s]


Mood Disorders (Mood)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:01<00:00, 8012.27it/s]


Addiction & Impulse Control (Addict.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 12226.73it/s]


Eating & Body (Body)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 146809.01it/s]


Neurodevelopmental Disorders (Neurodiv.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 15882.71it/s]


General (Health)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:02<00:00, 4762.31it/s]


Broad Social (Social)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:02<00:00, 4170.83it/s]


All
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:08<00:00, 1136.51it/s]


In [4]:
""" non-mhp """
for group, group_subreddits in groups.items():
    """
    get the task df for this group of subreddits 
    """
    print(group)
    group_subreddits_df = task_df[task_df['subreddit'].isin(group_subreddits)]

    tmp = group_subreddits_df[group_subreddits_df['author_type'] == 'non-mhp'].index
    tmp = df[df.index.isin(tmp)]

    values = list(tmp['composite-lsm-func'].values)
    ave, lower_error, upper_error = bootstrap_resampling(values)

    lsm_table['Mean-Peer'].append(ave)
    lsm_table['Lower Error-Peer'].append(lower_error)
    lsm_table['Upper Error-Peer'].append(upper_error)

Trauma & Abuse (Trauma)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 11777.95it/s]


Psychosis & Anxiety (Anx)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:06<00:00, 1629.65it/s]


Compulsive Disorders (Compuls.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 24070.13it/s]


Coping & Therapy (Cope)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:07<00:00, 1310.05it/s]


Mood Disorders (Mood)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:07<00:00, 1345.07it/s]


Addiction & Impulse Control (Addict.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:12<00:00, 775.48it/s]


Eating & Body (Body)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:00<00:00, 17695.95it/s]


Neurodevelopmental Disorders (Neurodiv.)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:10<00:00, 961.39it/s]


General (Health)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:20<00:00, 482.24it/s]


Broad Social (Social)
Beginning bootstrap resampling


100%|██████████| 10000/10000 [00:18<00:00, 551.77it/s]


All
Beginning bootstrap resampling


100%|██████████| 10000/10000 [01:45<00:00, 94.55it/s]


In [5]:
lsm_table_df = pd.DataFrame(lsm_table, columns=lsm_table.keys())
lsm_table_df

Unnamed: 0,Category,Mean-MHP,Lower Error-MHP,Upper Error-MHP,Mean-Peer,Lower Error-Peer,Upper Error-Peer
0,Trauma & Abuse (Trauma),0.631012,0.030815,0.029962,0.637887,0.012096,0.011958
1,Psychosis & Anxiety (Anx),0.59972,0.015544,0.015192,0.592206,0.004732,0.004618
2,Compulsive Disorders (Compuls.),0.628175,0.037389,0.035752,0.585642,0.017936,0.017818
3,Coping & Therapy (Cope),0.647327,0.010553,0.010067,0.588711,0.004486,0.004598
4,Mood Disorders (Mood),0.643728,0.009328,0.00918,0.610514,0.004238,0.00417
5,Addiction & Impulse Control (Addict.),0.570995,0.015134,0.015199,0.540804,0.003959,0.003898
6,Eating & Body (Body),0.588794,0.046969,0.04433,0.615564,0.015878,0.015294
7,Neurodevelopmental Disorders (Neurodiv.),0.630906,0.014303,0.013919,0.59928,0.003639,0.00369
8,General (Health),0.614486,0.007747,0.007426,0.608941,0.002665,0.002705
9,Broad Social (Social),0.506024,0.008594,0.00863,0.618481,0.002873,0.002844


In [6]:
categories = ['Trauma', 'Anx', 'Compuls', 'Cope', 'Mood', 'Addict', 'Body', 'Neurodiv', 'Health', 'Social', 'All']
print("\\begin{figure*}")
print("    \\centering")
print("      \\begin{tikzpicture}")
print("      \\begin{axis}[")
print("      width  = \\linewidth,")
print("      height = 4cm,")
# %   major x tick style = transparent,
print("      ybar=2*\pgflinewidth,")
# print("      bar width=25pt,")
print("      ymajorgrids = true,")
print("      symbolic x coords=[{}],".format(",".join(categories)).replace("[", "{").replace("]", "}").replace("&", "\&"))
print("      xtick = data,")
print("      scaled y ticks = false,")
# print("      enlarge x limits=0.50,")
print("      ymin=0,")
print("      legend cell align=left,")
# print("      legend style={at={(0.5,-0.12)},anchor=north}")
print("      ]")
print("      \\addplot[style={fill=white},error bars/.cd, y dir=both, y explicit]")
print("          coordinates {")
for group, ave, upper, lower in zip(categories,lsm_table_df['Mean-MHP'].values, lsm_table_df['Upper Error-MHP'].values, lsm_table_df['Lower Error-MHP'].values):
    print("({},{}) += (0,{}) -= (0,{})".format(group, ave, upper, lower).replace("&", "\&"))
print("          };")

print("      \\addplot[style={fill=lightgray},error bars/.cd, y dir=both, y explicit,error bar style=black]")
print("           coordinates {")
for group, ave, upper, lower in zip(categories,lsm_table_df['Mean-Peer'].values, lsm_table_df['Upper Error-Peer'].values, lsm_table_df['Lower Error-Peer'].values):
# for group, ave, upper, lower in zip(categories,lsm_table_df['Mean-Peer'].values, lsm_table_df['Upper Error-Peer'].values, lsm_table_df['Lower Error-Peer'].values):
    print("({},{}) += (0,{}) -= (0,{})".format(group, ave, upper, lower).replace("&", "\&"))
print("          };")
print("      \\legend{MHP, Peer}")
print("  \\end{axis}")
print("  \\end{tikzpicture}")
print(" \\caption{LSM scores with 95\% confidence intervals.}")
print(" \\label{fig:lsm}")
print("\\end{figure*}") 



\begin{figure*}
    \centering
      \begin{tikzpicture}
      \begin{axis}[
      width  = \linewidth,
      height = 4cm,
      ybar=2*\pgflinewidth,
      ymajorgrids = true,
      symbolic x coords={Trauma,Anx,Compuls,Cope,Mood,Addict,Body,Neurodiv,Health,Social,All},
      xtick = data,
      scaled y ticks = false,
      ymin=0,
      legend cell align=left,
      ]
      \addplot[style={fill=white},error bars/.cd, y dir=both, y explicit]
          coordinates {
(Trauma,0.6310119156541756) += (0,0.029962442897110475) -= (0,0.03081514525447937)
(Anx,0.5997199013986588) += (0,0.01519174511662591) -= (0,0.015544352706185927)
(Compuls,0.6281754095486042) += (0,0.0357519342555046) -= (0,0.03738872492676926)
(Cope,0.6473266495067116) += (0,0.010066984978184701) -= (0,0.010552935538052588)
(Mood,0.6437280427655686) += (0,0.009180262091117664) -= (0,0.009328442085683064)
(Addict,0.5709948285212645) += (0,0.015199256087097268) -= (0,0.015134390184438407)
(Body,0.5887944659204036) += (0,0.