In [19]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report, accuracy_score, matthews_corrcoef, roc_auc_score


### Binary prediction Metrics

In [2]:
# Performance measures

def prediction_measures(y_test,ypred):
    performance = classification_report(y_test,ypred,output_dict=True)
    return performance['depression'],performance['normal'],performance['accuracy']


In [12]:
# All measurements

def all_measures(predsensitive, truesensitive, predother, trueother, name='test',single=False):
    score_dict = {}
    score_dict['predictor'] = name
    # Metrics for sensitive group
    depression_performance, normal_performance ,accuracy = prediction_measures(predsensitive,truesensitive)
    score_dict['depressionSensPrecision'] = depression_performance['precision']
    score_dict['depressionSensRecall'] = depression_performance['recall']
    score_dict['depressionSensF1'] = depression_performance['f1-score']
    score_dict['depressionSensSupport'] = depression_performance['support']
    score_dict['normalSensPrecision'] = normal_performance['precision']
    score_dict['normalSensRecall'] = normal_performance['recall']
    score_dict['normalSensF1'] = normal_performance['f1-score']
    score_dict['normalSensSupport'] = normal_performance['support']
    score_dict['accuracySens'] = accuracy
    score_dict['MCCSens'] = matthews_corrcoef(predsensitive,truesensitive)
    if single == False:
        # Metrics for other group
        depression_performance, normal_performance ,accuracy = prediction_measures(predother,trueother)
        score_dict['depressionOtherPrecision'] = depression_performance['precision']
        score_dict['depressionOtherRecall'] = depression_performance['recall']
        score_dict['depressionOtherF1'] = depression_performance['f1-score']
        score_dict['depressionOtherSupport'] = depression_performance['support']
        score_dict['normalOtherPrecision'] = normal_performance['precision']
        score_dict['normalOtherRecall'] = normal_performance['recall']
        score_dict['normalOtherF1'] = normal_performance['f1-score']
        score_dict['normalOtherSupport'] = normal_performance['recall']
        score_dict['accuracyOther'] = accuracy
        score_dict['MCCOther'] = matthews_corrcoef(predother,trueother)
        # Metrics for total
        depression_performance, normal_performance ,accuracy = prediction_measures(pd.concat([predsensitive,predother]),pd.concat([truesensitive,trueother]))
        score_dict['depressionTotalPrecision'] = depression_performance['precision']
        score_dict['depressionTotalRecall'] = depression_performance['recall']
        score_dict['depressionTotalF1'] = depression_performance['f1-score']
        score_dict['depressionTotalSupport'] = depression_performance['support']
        score_dict['normalTotalPrecision'] = normal_performance['precision']
        score_dict['normalTotalRecall'] = normal_performance['recall']
        score_dict['normalTotalF1'] = normal_performance['f1-score']
        score_dict['normalTotalSupport'] = normal_performance['recall']
        score_dict['accuracyTotal'] = accuracy
        score_dict['MCCTotal'] = matthews_corrcoef(pd.concat([predsensitive,predother]),pd.concat([truesensitive,trueother]))
        # Fairness metrics between groups
        TNS, FPS, FNS, TPS = confusion_matrix(truesensitive, predsensitive).ravel()
        TNO, FPO, FNO, TPO = confusion_matrix(trueother,predother).ravel()
        score_dict['statisticalParity'] = ((TPS+FPS)/len(truesensitive)) / ((TPO+FPO)/len(trueother))
        score_dict['equalOpportunity'] = (TPS / (TPS+FNS)) / (TPO / (TPO+FNO))
        score_dict['equalisedOdds'] =  ((TPS / (TPS+FNS)) + (TNS / (TNS+FPS))) / ((TPO / (TPO+FNO))+ (TNO) / (TNO+FPO))
        score_dict['equalAccuracy'] = (accuracy_score(truesensitive,predsensitive)/accuracy_score(trueother,predother))
    return score_dict

In [15]:

options = ['w2vec','ruleBased','DictionaryBased','TFIDF500','TFIDFEX500','TFIDFIN500','blingfire','reweightedblingfire','nltk','reweightednltk','spacysm','reweightedspacysm','spacylg','reweightedspacylg','spacytrf','reweightedspacytrf']


for name in options:
    df = pd.read_json(f'data/predictionData/{name}Pred.json', lines=True, orient='records')

    pred_male = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['prediction']]
    pred_female = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['prediction']]
    y_testm = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['label']]
    y_testf = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['label']]

    score_dict = all_measures(pred_female,y_testf,pred_male,y_testm, name)

    try:
        df = pd.read_csv('data/results/depressiontestresults.csv')
    except:
        df = pd.DataFrame()

    df_new = pd.DataFrame(score_dict,index=[len(df)])

    df = pd.concat([df,df_new],ignore_index=True)

    df.to_csv('data/results/depressiontestresults.csv', index=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  score_dict['statisticalParity'] = ((TPS+FPS)/len(truesensitive)) / ((TPO+FPO)/len(trueother))
  score_dict['equalOpportunity'] = (TPS / (TPS+FNS)) / (TPO / (TPO+FNO))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _war

In [5]:
df = pd.read_json(f'data/predictionData/TFIDF500Pred.json', lines=True, orient='records')

pred_male = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['prediction']]
pred_female = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['prediction']]
y_testm = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['label']]
y_testf = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['label']]

print(matthews_corrcoef(y_testm,pred_male))
print(matthews_corrcoef(y_testf,pred_female))

1.0
0.8920208508039849


### Symptom prediction results

In [6]:
def getPred(df, threshold):
    symps = ["Anxious_Mood","Autonomic_symptoms","Cardiovascular_symptoms","Catatonic_behavior","Decreased_energy_tiredness_fatigue","Depressed_Mood","Gastrointestinal_symptoms","Genitourinary_symptoms","Hyperactivity_agitation","Impulsivity","Inattention","Indecisiveness","Respiratory_symptoms","Suicidal_ideas","Worthlessness_and_guilty","avoidance_of_stimuli","compensatory_behaviors_to_prevent_weight_gain","compulsions","diminished_emotional_expression","do_things_easily_get_painful_consequences","drastical_shift_in_mood_and_energy","fear_about_social_situations","fear_of_gaining_weight","fears_of_being_negatively_evaluated","flight_of_ideas","intrusion_symptoms","loss_of_interest_or_motivation","more_talktive","obsession","panic_fear","pessimism","poor_memory","sleep_disturbance","somatic_muscle","somatic_symptoms_others","somatic_symptoms_sensory","weight_and_appetite_change","Anger_Irritability"]

    relevant_sents = []
    postid = 0
    sentid = 0

    for post in df['sentences']:
        postid += 1
        for sent in post:
            sentid += 1
            meet_threshold = []
            if len(sent['sentence'].split()) >= 10:
                meet_threshold = [{'id':[postid,sentid],'sentence':sent['sentence']}]
                for symptom in symps:
                    if sent['probabilities'][symptom] >= 0.3:
                        meet_threshold.append({symptom:sent['probabilities'][symptom]})
                        if sent['probabilities'][symptom] >= threshold:
                            meet_threshold.append('1')
                        else:
                            meet_threshold.append('0')
                        meet_threshold.append(sent['probabilities'][symptom])
                if len(meet_threshold) >= 4:
                    relevant_sents.append(meet_threshold)
            else:
                continue
    df = pd.DataFrame(relevant_sents)
    return df


In [7]:
# Create total set
df = pd.concat([pd.read_json(f'data/annotationData/femaledata.json'),pd.read_json(f'data/annotationData/maledata.json')]).reset_index()
print(len(df))
df.to_json('data/annotationData/totaldata.json', orient='records')

40


In [22]:
ytruem = pd.read_csv('data/annotationData/MaleAnnotationsetFilled.csv', sep=';')
ytruef = pd.read_csv('data/annotationData/FemaleAnnotationsetFilled.csv', sep=';')


precisions = []
recalls = []
f1_scores = []
tprs = []
fprs = []
mccs = []
rocs = []

thresholds = list(np.arange(0.3,1,0.01))
gender = 'total'

if gender == 'female':
    ytrue = ytruef
if gender == 'male':
    ytrue = ytruem
if gender == 'total':
    ytrue = pd.concat([ytruef,ytruem]).reset_index()

for threshold in thresholds:
    ypred = getPred(pd.read_json(f'data/annotationData/{gender}data.json'), threshold)
    dataset = []
    for x in range(len(ytrue)):
        for i in range(1,30,3):
            try:
                obj = ast.literal_eval(ytrue[str(i)][x])
                symp =list(obj.keys())[0]
                true = ytrue[str(i+1)][x]
                pred = ypred[i+1][x]
                prob = ypred[i+2][x]
                data = {'post':x,'symptom': symp, 'true':int(true), 'pred':int(pred), 'prob':float(prob)}
                dataset.append(data)
            except:
                continue
    df = pd.DataFrame(dataset)
    true = df['true']
    pred = df['pred']
    prob = df['prob']
    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()
    precisions.append(precision_score(true, pred))
    recalls.append(recall_score(true, pred))
    f1_scores.append(f1_score(true, pred))
    mccs.append(matthews_corrcoef(true, pred))
    rocs.append(roc_auc_score(true,prob))
    # symptompredictions for further analysis
    # df.to_csv('data/predictionData/symptomPred.csv')

# Create plot
# plt.figure(figsize=(10, 6))
# plt.plot(thresholds, precisions, label='Precision')
# plt.plot(thresholds, recalls, label='Recall')
# plt.plot(thresholds, f1_scores, label='F1 Score')
# plt.plot(thresholds, mccs, label='Matthews')
# plt.plot(thresholds, rocs, label='ROC-AUC')
# plt.xlabel('Threshold')
# plt.ylabel('Score')
# plt.title('Threshold vs. Metric Scores')
# plt.legend()
# plt.grid(True)
# plt.show()

fig= go.Figure()
fig.add_trace(go.Scatter(x=thresholds, y=precisions, mode='lines', name='Precision'))
fig.add_trace(go.Scatter(x=thresholds, y=recalls, mode='lines', name='Recall'))
fig.add_trace(go.Scatter(x=thresholds, y=f1_scores, mode='lines', name='F1 Score'))
fig.add_trace(go.Scatter(x=thresholds, y=mccs, mode='lines', name='Matthews'))
fig.add_trace(go.Scatter(x=thresholds, y=rocs, mode='lines', name='ROC-AUC'))

fig.update_layout(title='Threshold vs. Metric Scores',
                  xaxis_title='Threshold',
                  yaxis_title='Score')

fig.show()



In [9]:
ytruem = pd.read_csv('data/annotationData/MaleAnnotationsetFilled.csv', sep=';')
ytruef = pd.read_csv('data/annotationData/FemaleAnnotationsetFilled.csv', sep=';')

threshold = 0.7
gender = 'total'

if gender == 'female':
    ytrue = ytruef
if gender == 'male':
    ytrue = ytruem
if gender == 'total':
    ytrue = pd.concat([ytruef,ytruem]).reset_index()


ypred = getPred(pd.read_json(f'data/annotationData/{gender}data.json'), threshold)
dataset = []
for x in range(len(ytrue)):
    for i in range(1,30,3):
        try:
            obj = ast.literal_eval(ytrue[str(i)][x])
            symp =list(obj.keys())[0]
            true = ytrue[str(i+1)][x]
            pred = ypred[i+1][x]
            prob = ypred[i+2][x]
            data = {'post':x,'symptom': symp, 'true':int(true), 'pred':int(pred), 'prob':float(prob)}
            dataset.append(data)
        except:
            continue
df = pd.DataFrame(dataset)
true = df['true']
pred = df['pred']
prob = df['prob']
precision = precision_score(true, pred)
recall = recall_score(true, pred)
f1 = f1_score(true, pred)
auc = roc_auc_score(true, prob)
mcc = matthews_corrcoef(true, pred)


0.030310732974272555
