In [34]:
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import gc
from retry import retry
import requests
import os
import openai
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor

In [35]:
from openai import OpenAI

In [36]:
client = OpenAI(api_key='<YOUR-API-TOKEN>')

In [37]:
n = 1

In [38]:
df3 = pd.read_csv('oneHotData.csv')
# col_to_use = 'TEXT'

In [39]:
train_data, test_data = train_test_split(df3, test_size=0.2, random_state=42)

In [40]:
def make_prompt(top_n_rows, test_row):
    prompt = """Act as a professional psychiatrist and study the passage for signs of mental health disorders. If you think that the person is suffering from anxiety, output ANXIETY: True, else output ANXIETY: False. Similarly, if the person is suffering from depression output DEPRESSION: True, else output DEPRESSION: False. Do not print anything else. There are some examples on how to do this provided in the backticks.
```"""
    for i, row in top_n_rows.iterrows():
        prompt += f"""
Passage: {row['questionFull']}
ANXIETY: {row['Anxiety']}
DEPRESSION: {row['Depression']}
"""

    prompt += f"""
```
Using the understanding of the task gained from the above examples do the same for the following passage
Passage: {test_row['questionFull']}
"""
    return prompt

In [41]:
few_shot_args = [(train_data.sample(n), test_row) for i, test_row in test_data.iterrows()]

In [42]:
def make_message(top_n_rows, test_row):
    message = [
        {"role": "user", "content": f"{make_prompt(top_n_rows, test_row)}"}
    ]
    
    return message

In [43]:
@retry(exceptions=Exception, tries=2, delay=30)
def make_call(top_n_rows, test_row):
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo-1106",
            messages=make_message(top_n_rows, test_row),
            # request_timeout=60
        )
        return response
    except KeyboardInterrupt:
        print("Closing...")
        return "Error"
    except Exception as e:
        print("Timeout occurred, retrying")
        print(e)
        raise e

In [44]:
def make_few_shot_prompt_call(top_n_rows, test_row):
    # prompt = make_prompt(top_n_rows, test_row)
    resp = make_call(top_n_rows, test_row)
    # return resp
    pbar.update(1)
    
    return resp    

In [45]:
MAX_THREADS = 10
pbar = tqdm(total=len(few_shot_args))
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    res = list(executor.map(make_few_shot_prompt_call, *zip(*few_shot_args)))

  0%|          | 0/276 [00:00<?, ?it/s]

  0%|          | 0/276 [00:52<?, ?it/s]


In [47]:
res[0].choices[0].message.content

'ANXIETY: True\nDEPRESSION: False'

In [50]:
outputs = [op.choices[0].message.content.replace('\n', " <NEWLINE> ") for op in res]

In [52]:
res_df = pd.DataFrame({"generated_outputs": outputs})
res_df.to_csv(f"GPT3.5_Outputs_{n}Shot_CounselChat.csv", index=False)

In [15]:
def get_text(api_response):
    try:
        ret = api_response.choices[0].message.content
        return ret
    except:
        print("Error occurred with the text retrieval")
        return "Error"

In [16]:
# df1['Generated Diagnosis Summary'] = np.empty((df1.shape[0], ))
# df2['Generated Diagnosis Summary'] = np.empty((df2.shape[0], ))
df3['GPT Summary'] = np.empty((df3.shape[0], ))

In [17]:
df3.reset_index(inplace=True)

In [18]:
df3.drop(columns='index', inplace=True)

In [19]:
# for i in tqdm(range(df1.shape[0])):
#     df1.loc[i, 'Generated Diagnosis Summary'] = get_text(make_call(df1.iloc[i]))

# for i in tqdm(range(df2.shape[0])):
#     df2.loc[i, 'Generated Diagnosis Summary'] = get_text(make_call(df2.iloc[i]))
    
for i in tqdm(range(df3.shape[0])):
    df3.loc[i, 'GPT Summary'] = get_text(make_call(df3.iloc[i]))

100%|██████████| 127/127 [21:38<00:00, 10.22s/it]


In [20]:
df3['Generated Diagnosis Summary'] = df3['GPT Summary']

In [30]:
df3['Predicted Diagnosis'] = df3['Generated Diagnosis Summary'].apply(lambda text: ". ".join(text.replace('\n\n', '\n').split(sep='\n')))

In [32]:
og_df = pd.read_csv("test_diag_dataset.csv", sep='|')

In [None]:
df3['Generated Diagnosis Summary'] = og_df['Generated Diagnosis Summary']

In [29]:
df3.to_csv("GPT35_Predictions.csv", sep='|', index=False)

In [33]:
df3.to_csv('FILE_TARGET', index=False)