In [1]:
from metrics.mhic import MHIC
from metrics.intra_nli import IntraNLI
from metrics.fc_expert import FCExpert
import numpy as np
import pandas as pd
import json
import os

### Load and prepare CLPSYCH 2025 data

In [2]:
# import clpsych 2025 timelines 
CLPSYCH_PATH = "/import/nlp/datasets/clpsych2025/train"

# iterate through directory and read in all json files
clpsych_raw = {}
for root, dirs, files in os.walk(CLPSYCH_PATH):
    for file in files:
        if file.endswith(".json"):
            with open(os.path.join(root, file), 'r') as f:
                clpsych_raw[file.split('.')[0]] = json.load(f)

In [3]:
# create dataframe for posts
clpsych_posts = {
    "timeline_id": [],
    "post_id": [],
    "post": [],
}
for timeline in clpsych_raw.values():
    for post in timeline["posts"]:
        clpsych_posts["timeline_id"].append(timeline["timeline_id"])
        clpsych_posts["post_id"].append(post["post_id"])
        clpsych_posts["post"].append(post["post"])


clpsych_posts_df = pd.DataFrame(clpsych_posts)

In [4]:
# gather timeline summaries
clpsych_summaries = {
    "timeline_id": [],
    "summary": [],
}
for timeline in clpsych_raw.values():
    clpsych_summaries["timeline_id"].append(timeline["timeline_id"])
    clpsych_summaries["summary"].append(timeline["timeline_summary"])

clpsych_summaries_df = pd.DataFrame(clpsych_summaries)

### Calculate MHIC

In [5]:
mhic = MHIC()

In [6]:
# calculate mhic

# get posts for first timeline
timeline_id = clpsych_posts_df["timeline_id"].iloc[0]
summary = clpsych_summaries_df[clpsych_summaries_df["timeline_id"] == timeline_id]["summary"].values[0]
posts = clpsych_posts_df[clpsych_posts_df["timeline_id"] == timeline_id]["post"].values

mhic_score = mhic.calculate_metric(summary, posts)
mhic_score

np.float64(0.4285615613063177)

### Calculate Intra NLI

In [7]:
intra_nli = IntraNLI()


Some weights of the model checkpoint at ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [8]:
intra_nli_scores = []
for row in clpsych_summaries_df.iterrows():
    summary = row[1]["summary"]
    intra_nli_scores.append(intra_nli.calculate_metric(summary))
np.nanmean(intra_nli_scores)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


np.float64(0.8917266097153299)

### Calculate FC_expert

In [9]:
fc_expert = FCExpert()

In [11]:
fc_expert_score = fc_expert.calculate_metric(clpsych_summaries_df.iloc[0]["summary"], clpsych_summaries_df.iloc[0]["summary"])
fc_expert_score

np.float64(0.9949385468207765)