In [1]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, classification_report, accuracy_score, matthews_corrcoef, roc_auc_score


### Binary prediction Metrics

In [2]:
# Performance measures

def prediction_measures(y_test,ypred):
    performance = classification_report(y_test,ypred,output_dict=True)
    return performance['depression'],performance['normal'],performance['accuracy']


In [6]:
# All measurements

def all_measures(predfemale, truefemale, probfemale, predmale, truemale,probmale, name='test',single=False,roc=True):
    score_dict = {}
    score_dict['predictor'] = name
    # Metrics for female group
    depression_performance, normal_performance ,accuracy = prediction_measures(truefemale,predfemale)
    score_dict['depressionFemalePrecision'] = depression_performance['precision']
    score_dict['depressionFemaleRecall'] = depression_performance['recall']
    score_dict['depressionFemaleF1'] = depression_performance['f1-score']
    # score_dict['depressionFemaleSupport'] = depression_performance['support']
    score_dict['normalFemalePrecision'] = normal_performance['precision']
    score_dict['normalFemaleRecall'] = normal_performance['recall']
    score_dict['normalFemaleF1'] = normal_performance['f1-score']
    # score_dict['normalFemaleSupport'] = normal_performance['support']
    score_dict['accuracyFemale'] = accuracy
    score_dict['MCCFemale'] = matthews_corrcoef(truefemale,predfemale)
    if roc == True:
        score_dict['roc-aucFemale'] = roc_auc_score(truefemale,probfemale)
    if single == False:
        # Metrics for male group
        depression_performance, normal_performance ,accuracy = prediction_measures(truemale, predmale)
        score_dict['depressionMalePrecision'] = depression_performance['precision']
        score_dict['depressionMaleRecall'] = depression_performance['recall']
        score_dict['depressionMaleF1'] = depression_performance['f1-score']
        # score_dict['depressionMaleSupport'] = depression_performance['support']
        score_dict['normalMalePrecision'] = normal_performance['precision']
        score_dict['normalMaleRecall'] = normal_performance['recall']
        score_dict['normalMaleF1'] = normal_performance['f1-score']
        # score_dict['normalMaleSupport'] = normal_performance['recall']
        score_dict['accuracyMale'] = accuracy
        score_dict['MCCMale'] = matthews_corrcoef(truemale, predmale)
        if roc == True:
            score_dict['roc-aucMale'] = roc_auc_score(truemale,probmale)
        
        # Metrics for total
        depression_performance, normal_performance ,accuracy = prediction_measures(pd.concat([truefemale,truemale]),pd.concat([predfemale,predmale]))
        score_dict['depressionTotalPrecision'] = depression_performance['precision']
        score_dict['depressionTotalRecall'] = depression_performance['recall']
        score_dict['depressionTotalF1'] = depression_performance['f1-score']
        # score_dict['depressionTotalSupport'] = depression_performance['support']
        score_dict['normalTotalPrecision'] = normal_performance['precision']
        score_dict['normalTotalRecall'] = normal_performance['recall']
        score_dict['normalTotalF1'] = normal_performance['f1-score']
        # score_dict['normalTotalSupport'] = normal_performance['recall']
        score_dict['accuracyTotal'] = accuracy
        score_dict['MCCTotal'] = matthews_corrcoef(pd.concat([truefemale,truemale]),pd.concat([predfemale,predmale]))
        if roc == True:
            score_dict['roc-aucTotal'] = roc_auc_score(pd.concat([truefemale,truemale]),pd.concat([probfemale,probmale]))
        # Fairness metrics between groups
        TNS, FPS, FNS, TPS = confusion_matrix(truefemale, predfemale).ravel()
        TNO, FPO, FNO, TPO = confusion_matrix(truemale,predmale).ravel()
        score_dict['statisticalParity'] = ((TPS+FPS)/len(truefemale)) / ((TPO+FPO)/len(truemale))
        score_dict['equalOpportunity'] = (TPS / (TPS+FNS)) / (TPO / (TPO+FNO))
        score_dict['equalisedOdds'] =  ((TPS / (TPS+FNS)) + (TNS / (TNS+FPS))) / ((TPO / (TPO+FNO))+ (TNO) / (TNO+FPO))
        score_dict['equalAccuracy'] = (accuracy_score(truefemale,predfemale)/accuracy_score(truemale,predmale))
    return score_dict

In [25]:

options = ['w2vec','ruleBased','DictionaryBased','TFIDF500','TFIDFEX500','TFIDFIN500','blingfire','reweightedblingfire','nltk','reweightednltk','spacysm','reweightedspacysm','spacylg','reweightedspacylg','spacytrf','reweightedspacytrf']
options = ['w2vec','TFIDF500','ruleBased','DictionaryBased','spacysm']

for name in options:
    df = pd.read_json(f'data/predictionDataTest/{name}Pred.json', lines=True, orient='records')
    roc = True

    predmale = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['prediction']]
    predfemale = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['prediction']]
    try:
        probmale = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['probability']]
        probfemale = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['probability']]
    except:
        roc = False
        probfemale = 1
        probmale = 1
    ytestmale = df.loc[(df['gender'] == 'm') & (df['split'] == 'test'), ['label']]
    ytestfemale = df.loc[(df['gender'] == 'f') & (df['split'] == 'test'), ['label']]
    

    score_dict = all_measures(predfemale,ytestfemale,probfemale,predmale,ytestmale,probmale, name, roc=roc)

    try:
        df = pd.read_csv('data/results/depressionResultsTest.csv')
    except:
        df = pd.DataFrame()

    df_new = pd.DataFrame(score_dict,index=[len(df)])

    df = pd.concat([df,df_new],ignore_index=True)

    df.to_csv('data/results/depressionResultsTest.csv', index=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Symptom prediction results

In [4]:
def getPred(df, threshold):
    symps = ["Anxious_Mood","Autonomic_symptoms","Cardiovascular_symptoms","Catatonic_behavior","Decreased_energy_tiredness_fatigue","Depressed_Mood","Gastrointestinal_symptoms","Genitourinary_symptoms","Hyperactivity_agitation","Impulsivity","Inattention","Indecisiveness","Respiratory_symptoms","Suicidal_ideas","Worthlessness_and_guilty","avoidance_of_stimuli","compensatory_behaviors_to_prevent_weight_gain","compulsions","diminished_emotional_expression","do_things_easily_get_painful_consequences","drastical_shift_in_mood_and_energy","fear_about_social_situations","fear_of_gaining_weight","fears_of_being_negatively_evaluated","flight_of_ideas","intrusion_symptoms","loss_of_interest_or_motivation","more_talktive","obsession","panic_fear","pessimism","poor_memory","sleep_disturbance","somatic_muscle","somatic_symptoms_others","somatic_symptoms_sensory","weight_and_appetite_change","Anger_Irritability"]

    relevant_sents = []
    postid = 0
    sentid = 0

    for post in df['sentences']:
        postid += 1
        for sent in post:
            sentid += 1
            meet_threshold = []
            if len(sent['sentence'].split()) >= 10:
                meet_threshold = [{'id':[postid,sentid],'sentence':sent['sentence']}]
                for symptom in symps:
                    if sent['probabilities'][symptom] >= 0.3:
                        meet_threshold.append({symptom:sent['probabilities'][symptom]})
                        if sent['probabilities'][symptom] >= threshold:
                            meet_threshold.append('1')
                        else:
                            meet_threshold.append('0')
                        meet_threshold.append(sent['probabilities'][symptom])
                if len(meet_threshold) >= 4:
                    relevant_sents.append(meet_threshold)
            else:
                continue
    df = pd.DataFrame(relevant_sents)
    return df


In [6]:
# Create total set
df = pd.concat([pd.read_json(f'data/annotationData/femaledata.json'),pd.read_json(f'data/annotationData/maledata.json')]).reset_index()
df.to_json('data/annotationData/totaldata.json', orient='records')

In [26]:
ytruem = pd.read_csv('data/annotationData/MaleAnnotationsetFilled.csv', sep=';')
ytruef = pd.read_csv('data/annotationData/FemaleAnnotationsetFilled.csv', sep=';')


precisions = []
recalls = []
f1_scores = []
tprs = []
fprs = []
mccs = []
rocs = []

thresholds = list(np.arange(0.3,0.7,0.01))
# thresholds = [0.5]
gender = 'total'

if gender == 'female':
    ytrue = ytruef
if gender == 'male':
    ytrue = ytruem
if gender == 'total':
    ytrue = pd.concat([ytruef,ytruem]).reset_index()

for threshold in thresholds:
    ypred = getPred(pd.read_json(f'data/annotationData/{gender}data.json'), threshold)
    dataset = []
    for x in range(len(ytrue)):
        for i in range(1,30,3):
            try:
                obj = ast.literal_eval(ytrue[str(i)][x])
                symp =list(obj.keys())[0]
                true = ytrue[str(i+1)][x]
                pred = ypred[i+1][x]
                prob = ypred[i+2][x]
                data = {'post':x,'symptom': symp, 'true':int(true), 'pred':int(pred), 'prob':float(prob)}
                dataset.append(data)
            except:
                continue
    df = pd.DataFrame(dataset)
    true = df['true']
    pred = df['pred']
    prob = df['prob']
    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()
    precisions.append(precision_score(true, pred))
    recalls.append(recall_score(true, pred))
    f1_scores.append(f1_score(true, pred))
    mccs.append(matthews_corrcoef(true, pred))
    rocs.append(roc_auc_score(true,prob))
    # symptompredictions for further analysis
    # df.to_csv('data/predictionData/symptomPred0.5m.csv')


fig= go.Figure()
fig.add_trace(go.Scatter(x=thresholds, y=precisions, mode='lines', name='Precision'))
fig.add_trace(go.Scatter(x=thresholds, y=recalls, mode='lines', name='Recall'))
fig.add_trace(go.Scatter(x=thresholds, y=f1_scores, mode='lines', name='F1 Score'))
fig.add_trace(go.Scatter(x=thresholds, y=mccs, mode='lines', name='Matthews'))
fig.add_trace(go.Scatter(x=thresholds, y=rocs, mode='lines', name='ROC-AUC'))

fig.update_layout(title='Threshold vs. Metric Scores',
                  xaxis_title='Threshold',
                  yaxis_title='Score')

fig.show()

In [6]:
ytruem = pd.read_csv('data/annotationData/MaleAnnotationsetFilled.csv', sep=';')
ytruef = pd.read_csv('data/annotationData/FemaleAnnotationsetFilled.csv', sep=';')

threshold = 0.5
gender = 'total'

if gender == 'female':
    ytrue = ytruef
if gender == 'male':
    ytrue = ytruem
if gender == 'total':
    ytrue = pd.concat([ytruef,ytruem]).reset_index()


ypred = getPred(pd.read_json(f'data/annotationData/{gender}data.json'), threshold)
dataset = []
for x in range(len(ytrue)):
    for i in range(1,30,3):
        try:
            obj = ast.literal_eval(ytrue[str(i)][x])
            symp =list(obj.keys())[0]
            true = ytrue[str(i+1)][x]
            pred = ypred[i+1][x]
            prob = ypred[i+2][x]
            data = {'post':x,'symptom': symp, 'true':int(true), 'pred':int(pred), 'prob':float(prob)}
            dataset.append(data)
        except:
            continue
df = pd.DataFrame(dataset)
true = df['true']
pred = df['pred']
prob = df['prob']
precision = precision_score(true, pred)
recall = recall_score(true, pred)
f1 = f1_score(true, pred)
auc = roc_auc_score(true, prob)
mcc = matthews_corrcoef(true, pred)
tn, fp, fn, tp = confusion_matrix(true, pred).ravel()



In [20]:
# data = pd.read_csv('data/predictionData/symptomPred0.5.csv')
dff = pd.read_csv('data/predictionData/thresholds/symptomPred0.5f.csv')
dfm = pd.read_csv('data/predictionData/thresholds/symptomPred0.5m.csv')

import pandas as pd
from sklearn.metrics import classification_report, roc_auc_score, matthews_corrcoef

metrics = {}

data = dfm

total_report = classification_report(data['true'], data['pred'], output_dict=True, zero_division=0)
total_metrics = {
        'Precision (Positive)': total_report['1']['precision'] if '1' in total_report else 0,
        'Recall (Positive)': total_report['1']['recall'] if '1' in total_report else 0,
        'F1 (Positive)': total_report['1']['f1-score'] if '1' in total_report else 0,
        'Support (Positive)': total_report['1']['support'] if '1' in total_report else 0,
        
        'Precision (Negative)': total_report['0']['precision'] if '0' in total_report else 0,
        'Recall (Negative)': total_report['0']['recall'] if '0' in total_report else 0,
        'F1 (Negative)': total_report['0']['f1-score'] if '0' in total_report else 0,
        'Support (Negative)': total_report['0']['support'] if '0' in total_report else 0,
}

if len(data['true'].unique()) > 1:
    total_metrics['AUC-ROC'] = roc_auc_score(data['true'], data['prob'])
else:
    total_metrics['AUC-ROC'] = 'N/A'

total_metrics['MCC'] = matthews_corrcoef(data['true'], data['pred'])
tn, fp, fn, tp = confusion_matrix(data['true'], data['pred']).ravel()
total_metrics['TPR'] = tp / (tp + fn)
total_metrics['TNR'] = tn / (tn + fp)

metrics['Total'] = total_metrics


display(pd.DataFrame(metrics)['Total'])

AUC-ROC                  0.605284
F1 (Negative)            0.301887
F1 (Positive)            0.812183
MCC                      0.114187
Precision (Negative)     0.296296
Precision (Positive)     0.816327
Recall (Negative)        0.307692
Recall (Positive)        0.808081
Support (Negative)      26.000000
Support (Positive)      99.000000
TNR                      0.307692
TPR                      0.808081
Name: Total, dtype: float64