In [1]:
import torch
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, precision_score, recall_score

In [2]:
from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
from transformers import AutoTokenizer, pipeline
from huggingface_hub import login
import torch
import re
import pandas as pd
from tqdm import tqdm

In [3]:
test_df = pd.read_csv("/vast/palmer/scratch/liu_xiaofeng/ss4786/sourav/test_data.csv")
test_df.head()

Unnamed: 0.1,Unnamed: 0,subject_ID,num_writings,text,Anorexia
0,0,1152_1,4,Student at uw. Need a few hours away from cam...,1
1,1,1169_1,14,"Haha, nailed it dude.\n Rings are defi...",1
2,2,1441_1,55,"Right, which is why I said those people wo...",1
3,3,1518_1,31,I just have a lot of self hatred and depri...,1
4,4,1773_1,1,Thank you for your long reply! I have been...,1


In [4]:
d = {}
with open("/vast/palmer/scratch/liu_xiaofeng/ss4786/sourav/risk-golden-truth-test.txt", 'r') as f:
    for line in f:
        subject, label = line.split()
        d[subject[7:]] = int(label)

len(d)

320

In [5]:
import math

def get_delays(preds, delays_all):
    delays = []
    for i in range(0, len(preds)):
        delay = 0
        for j in range(0, len(preds[i])):
            if preds[i][j] == 1:
                # delay += delays_all[i][j]
                break
            delay += delays_all[i][j]
        delays.append(delay)
    return delays

import numpy as np

def lco(k, o):
    if k - o > 700:
        return 1.0
    elif k - o < -700:
        return 0.0
    else:
        return 1 - (1 / (1 + np.exp(k - o)))


def calculate_ERDE(preds, delays, ground_truth, cfn, ctp, o):
    cfp = np.sum(ground_truth)/len(ground_truth)
    erde_values = []
    for pred, gt, delay in zip(preds, ground_truth, delays):
        if pred == 1 and gt == 1:
            erde = lco(delay, o) * ctp 
        elif pred == 0 and gt == 1:
            erde = cfn
        elif pred == 1 and gt == 0:
            erde = cfp
        else:
            erde = 0
        erde_values.append(erde)
    return sum(erde_values) / len(erde_values)

In [23]:
from groq import Groq

client = Groq(api_key='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')
# MODEL = "llama-guard-3-8b"
MODEL = 'gemma-7b-it'


def run_chat_completion(user_input):
    """Generate a response based on the user input text."""
    try:
        chat_completion = client.chat.completions.create(
            messages = [
                {
                    "role": "system",
                    # "content": "Based on the definition of anorexia classify the text into anorexic, non-anorexic, neutral. Definition: A mental health condition characterized by persistent disturbances in eating behaviors, associated thoughts and emotions, and significant impact on physical health or psychosocial functioning. Common signs include extreme concern with body weight or shape, unusual eating habits, and excessive exercise." # best 2
                    "content": "Classify text into one of 2 labels: anorexic, non-anorexic based om the definition of anorexia. Give precise answer wihtout explanation. Definition: A mental health condition characterized by persistent disturbances in eating behaviors, associated thoughts and emotions, and significant impact on physical health or psychosocial functioning. Common signs include extreme concern with body weight or shape, unusual eating habits, and excessive exercise."
                    # "content": "You are a classifier. You have to classify whether the speaker has an eating disorder." # best till now
                    # "content": "You are a classifier and summarizer. You have to classify whether the speaker has an eating disorder. Summarize the output and return a one word answer."
                    # "content": "You are a text classifier expert specializing in eating disorders. Your task is to classify the input into positive if the person described shows signs of an eating disorder else negative."
                },
                {
                    "role": "user",
                    "content": user_input,
                }
            ],
            model=MODEL
        )

        # Print or return the model's response
        return chat_completion.choices[0].message.content

    except Exception as e:
        return f"Error: {str(e)}"

# Example usage
user_prompt = " I managed three consecutive days of eating 1,000 calories Friday, Saturday, and Sunday. Given that the past month has consisted of 500 or less (usually less if I'm being 100 honest) days, I'm feeling like that's a pretty big accomplishment"
response = run_chat_completion(user_prompt)
print(response)


**Anorexic**

The text exhibits symptoms consistent with anorexia nervosa, including extreme concern about body weight, restrictive eating habits, and efforts to control weight through excessive exercise.


In [13]:
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [14]:

def split_text_into_chunks(text, tokenizer, max_tokens=1000):
    tokens = tokenizer.tokenize(text)
    chunks = []
    for i in range(0, len(tokens), max_tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(tokenizer.convert_tokens_to_string(chunk))
    return chunks

In [42]:
def classify_anorexia(text):
    try: cleaned_text = clean_text(text)
    except: cleaned_text = "I am very happy."
    chunks = split_text_into_chunks(cleaned_text, tokenizer)
    result = []
    for chunk in chunks:
        # print(chunk)
        output = run_chat_completion(chunk)
        result.append(output)
        # print(output)
        # if type(output)!=str:
        #     result.append(0)
        # elif 'not possible to classify' in output.lower():
        #     result.append(0)
        # elif 'non-anorexic' in output.lower():
        #     result.append(0)
        # else:
        #     result.append(1)
    return result

In [43]:
results = {}

In [None]:
print(test_df['text'][3])

In [44]:
for idx, row in tqdm(test_df.iterrows(), desc="testing"):
    subject_id = row['subject_ID']
    text = row['text']
    results[subject_id] = classify_anorexia(text)

testing: 3200it [2:01:58,  2.29s/it]


In [46]:
results

{'1152_1': ['Non-anorexic\n\nThe provided text does not describe characteristics or symptoms associated with anorexia nervosa, such as extreme body weight concerns, unusual eating habits, or excessive exercise.'],
 '1169_1': ['Non-anorexic\n\nThe provided text describes general concerns related to eating behaviors, body image, and exercise, but does not specifically relate to the clinical definition of anorexia nervosa.'],
 '1441_1': ['Non-anorexic\n\nThe provided text does not exhibit characteristics or symptoms consistent with Anorexia Nervosa as defined in the given definition.'],
 '1518_1': ['Non-anorexic'],
 '1773_1': ['Non-anorexic\n\nThe provided text does not contain any information regarding eating disorders or anorexic behaviors, so it can be classified as non-anorexic.'],
 '2062_1': ['Non-anorexic'],
 '21_1': ['Non-anorexic\n\nThe provided text does not contain any information related to anorexic classifications or symptoms, therefore it cannot be categorized as an anorexic 

In [35]:
delay_dict = dict(zip(test_df['subject_ID'], test_df['num_writings']))

In [36]:
d = {}
with open("/vast/palmer/scratch/liu_xiaofeng/ss4786/sourav/risk-golden-truth-test.txt", 'r') as f:
    for line in f:
        subject, label = line.split()
        d[subject[7:]] = int(label)

len(d)

320

In [37]:
pred_labels, label, delay = [], [], []
for key in delay_dict.keys():
    subj = key.split('_')[0]
    label.append(d[subj])
    temp1, temp2 = [], []
    for i in range(1, 11):
        subject_id = subj + '_' + str(i)
        temp1.append(delay_dict[subject_id])
        prediction = results[subject_id]
        if sum(prediction)/len(prediction) > 0.699: temp2.append(1)
        else: temp2.append(0)
    pred_labels.append(temp2)
    delay.append(temp1)

print(pred_labels[0], label[0], delay[0])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 1 [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]


In [38]:
import math

def get_delays(preds, delays_all):
    delays = []
    for i in range(0, len(preds)):
        delay = 0
        for j in range(0, len(preds[i])):
            if preds[i][j] == 1:
                # delay += delays_all[i][j]
                break
            delay += delays_all[i][j]
        delays.append(delay)
    return delays

import numpy as np

def lco(k, o):
    if k - o > 700:
        return 1.0
    elif k - o < -700:
        return 0.0
    else:
        return 1 - (1 / (1 + np.exp(k - o)))


def calculate_ERDE(preds, delays, ground_truth, cfn, ctp, o):
    cfp = np.sum(ground_truth)/len(ground_truth)
    erde_values = []
    for pred, gt, delay in zip(preds, ground_truth, delays):
        if pred == 1 and gt == 1:
            erde = lco(delay, o) * ctp 
        elif pred == 0 and gt == 1:
            erde = cfn
        elif pred == 1 and gt == 0:
            erde = cfp
        else:
            erde = 0
        erde_values.append(erde)
    return sum(erde_values) / len(erde_values)

In [39]:
cfp = 0.01  # Cost of false positive
cfn = 1     # Cost of false negative
ctp = 1     # Cost of true positive
o = 5 

delays = get_delays(pred_labels, delay)

preds = []
for i in pred_labels:
    if sum(i) > 0: 
        preds.append(1)
    else: 
        preds.append(0)

print('ERDE5 = ', 100*calculate_ERDE(preds, delays, label, cfn, ctp, o))
print('ERDE50 = ', 100*calculate_ERDE(preds, delays, label, cfn, ctp, o*10))


ERDE5 =  13.773437500000057
ERDE50 =  13.773437500000057


In [40]:
f1_scores, precision, recall = [], [], []

for j in range(0, 11):
    preds = []
    for i in pred_labels:
        if sum(i) > j: 
            preds.append(1)
        else: 
            preds.append(0)
    
    f1_scores.append(f1_score(label, preds))
    precision.append(precision_score(label, preds))
    recall.append(recall_score(label, preds))

results_df = pd.DataFrame({
    'f1_score': f1_scores,
    'precision': precision,
    'recall': recall,
})

results_df

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,f1_score,precision,recall
0,0.115942,0.142857,0.097561
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
5,0.0,0.0,0.0
6,0.0,0.0,0.0
7,0.0,0.0,0.0
8,0.0,0.0,0.0
9,0.0,0.0,0.0


In [41]:
for key in results.keys():
    l = results[key]
    if 1 in l: print(key)

7422_1
6310_1
7371_10
9166_2
9978_2
4112_3
6680_3
8512_4
4071_5
6029_5
689_5
7248_5
2845_6
5512_6
6947_6
7278_6
7597_6
8444_6
6414_8
559_8
5808_8
9078_8
536_9
7809_9
8338_9
8561_9
8562_9
9789_9
