In [57]:
# !pip install litellm==1.26.0
import os
import litellm
import sys
from pathlib import Path
import numpy as np
import pandas as pd
from collections import OrderedDict

In [58]:
# [OPTIONAL] set env var
api_key = os.environ['OPENAI_API_KEY'] # string, you need your own key and to put at least $5 in the account
#s.environ["COHERE_API_KEY"] = api_keys.cohere_trial # string, you can get for free by logging into cohere and going to sandbox

gpt4_turbo = "gpt-4-1106-preview"
cohere = 'command-nightly'
litellm.drop_params=True # will ignore paramaters you set if they don't belong in a model

In [59]:
data_root = Path("/Volumes/Samsung_T5/MIT/abcd/release_05/abcd-data-release-5.1")
vocab_path  = Path("/Volumes/Samsung_T5/MIT/abcd/adhd_vocab.npy")

section = "core/mental-health"
table = 'mh_p_ksads_adhd.csv'
mapping = 'ksads_adhd.csv'

data_section = Path( data_root, section)
data_path = Path(data_section, table) # path to results table  
var_mapping_path =  Path(data_root, "var_mapping", mapping) # path to mapping of variable names to questions 

data = pd.read_csv(data_path)
var_description = pd.read_csv(var_mapping_path)
symptoms = np.unique(np.load(vocab_path, allow_pickle=True))

var_names = np.array(var_description["var_name"])
questions = np.array(var_description["var_label"])
mapping = OrderedDict(zip(var_names, questions))


  data = pd.read_csv(data_path)


In [60]:
# from openai import OpenAI
# client = OpenAI()
# completion = client.chat.completions.create(
#   model="gpt-3.5-turbo",
#   messages=[
#     {"role": "system", "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."},
#     {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
#   ]
# )

In [61]:
from litellm import completion


def api_request(
    prompt,
    model="commmand-nightly",
    api_key=None,
    temperature=0.1,
    top_p=1,
    timeout=45,
    num_retries=2,
    max_tokens=None,
    seed=None,
    response_format=None,
):
    # Open AI status: https://status.openai.com/

    messages = [{"content": prompt, "role": "user"}]
    responses = completion(
        model=model,
        messages=messages,
        api_key=api_key,
        temperature=temperature,
        top_p=top_p,
        timeout=timeout,
        num_retries=num_retries,
        max_tokens=max_tokens,
        seed=seed,
        # response_format = response_format
    )
    response = responses.get("choices")[0].get("message").get("content")  # access response for first message
    return response



In [62]:
prompt = """
Classify the document into one of the following symptoms:
Symptoms to choose from: {symptoms}

Output only the most likely symptom with no additional comments or explanation. 

For example, 
Document: {example_1}
Symptom: {symptom_1}


Document: {example_2}
Symptom: {symptom_2}


Document: {document}
Symptom:
"""

In [63]:
symptoms

array(['Addiction ', 'Impulse control ', 'Reward anticipation', 'Timing',
       'agression', 'anhedhonia', 'arousal moderation ',
       'cognitive flexibility', 'cognitive inhibition ',
       'delay aversion (impatience)', 'disruptive behavior',
       'distractability', 'executive control', 'fine motor coordination',
       'heigthend awareness/sensitivity (sensory overleod) ',
       'irritability', 'motor inhibition ', 'planning and organisation',
       'set-shifting', 'social cognition', 'stress generation',
       'unstable sleep habits', 'vigilance'], dtype=object)

In [64]:
#symptoms = ['sustained attention', 'hyperactivity', 'disruptive behavior', 'impulse control', 'reward anticipation', 'agression']
symptoms_str = '; '.join(symptoms)
symptoms_str

'Addiction ; Impulse control ; Reward anticipation; Timing; agression; anhedhonia; arousal moderation ; cognitive flexibility; cognitive inhibition ; delay aversion (impatience); disruptive behavior; distractability; executive control; fine motor coordination; heigthend awareness/sensitivity (sensory overleod) ; irritability; motor inhibition ; planning and organisation; set-shifting; social cognition; stress generation; unstable sleep habits; vigilance'

In [66]:
examples = ["In the past two weeks, how often has your child blurt out answers in school without being called on, or blurt out answers when someone wasn't talking to him or her?", 
            'You said that in the past your child had a lot of trouble keeping focused and paying attention; was often easily distracted; and often had trouble staying seated. When was that?']
symptoms_examples = ['impulse control', 
                     'motor inhibition']
#documents = ["In the past two weeks, I interrupted someone while talking"]
responses = {}
for i, document in enumerate(questions):
    prompt_i = prompt.format(symptoms = symptoms_str, example_1 = examples[0], example_2 = examples[1], symptom_1 = symptoms_examples[0], symptom_2 = symptoms_examples[1], document = document)
    response = api_request(prompt_i, model=gpt4_turbo)
    print(document)
    print(response)
    responses[i] = response

In the past two weeks, how often has your child put off or avoided doing things that were hard for him or her because they required a lot of attention?
delay aversion (impatience)
Do the problems we have just been talking about cause difficulties for your child in any of the following areas:With family
disruptive behavior
Do the problems we have just been talking about cause difficulties for your child in any of the following areas:With friends
social cognition
In the past two weeks, how often has your child talked when he or she wasn't supposed to, or had people complain that he or she talked too much?
disruptive behavior
In the past two weeks, how often has your child interrupted someone when they were talking?
impulse control
In the past two weeks, how often has your child had trouble waiting for his or her turn or waiting for other things?
delay aversion (impatience)
In the past two weeks, how often has your child blurt out answers in school without being called on, or blurt out an

In [None]:
response.split('\n')

['Symptom: impulse control',
 'Explanation: I interrupted someone while talking']

In [None]:
def generate_prompt(
    construct,
    prompt_name=None,
    prompt="default",
    domain=None,
    definition=None,
    examples=None,
    output_format="default",
    remove_parentheses_definition=True,
):
    if output_format == "default":
        output_format = (
            "Each token should be separated by a semicolon. Do not return duplicate tokens. Do not provide any"
            " explanation or additional text beyond the tokens."
        )
    # removed: Order them by how similar they are to {construct}.
    elif output_format == "json":
        output_format = (
            "Provide tokens in JSON output. Do not return duplicate tokens. Do not provide any explanation or"
            " additional text beyond the tokens."
        )

    # Prompt
    if not isinstance(prompt_name, str):
        # if prompt_name == None:
        prompt_name = construct.replace("_", " ").lower()

    if prompt == "default":
        prompt = "Provide many single words and some short phrases related to"
        if domain:
            domain = f"(in the {domain} domain). "
            prompt = f"""{prompt} {prompt_name} {domain}{output_format}"""
        else:
            prompt = f"""{prompt} {prompt_name}. {output_format}"""
        if definition:
            if remove_parentheses_definition:
                definition = re.sub(r"\(.*?\)", "", definition)  # remove parentheses which is usually citation.
            prompt += f"\nHere is a definition of {prompt_name}: {definition.lower().strip()}"

        if isinstance(examples, list):
            examples = "; ".join(examples)
        if isinstance(examples, str):
            # examples = '; '.join(examples)
            prompt += f"\nHere are some examples (include these in the list): {examples}."

    return prompt

