In [None]:
os.getenv("OPENAI_API_KEY")

### Data preparatiaon

In [None]:
system_prompt = """
You are a financial advisor guiding users to change their personal profiles to increase their income. Users will provide their current personal profiles as a dictionary of features and their values. Based on these features, Your task is to recommend the optimal effort allocation strategies that will improve the probability of the user getting a high income.

### Requirement of your recommendation strategies

1. Your recommendation strategy must be based on the unique user's provided features and your knowledge and reasoning to help them increase the income.

1. Your recommendation strategy must be a JSON dictionary containing up to five strategies for
affecting their features "SCHL" (education level), "WKHP" (working hours per week).

2. Strategies include a Direction ("increase" or "decrease"), and Effort (the amount of effort going to changing that feature in the given Direction). Do not use any direction other than increase or decrease.

3. Effort is valid as long as it is a non-negative number. Although there is no effort budeget, for each unit of effort, the user will pay a cost of the square of this effort divided by 2 (e.g., 0.5 effort will incur 0.5^2/2 = 0.125 cost). While the reward of the user will be the amount of probability improvement (maximum reward is 1 since the largest possible probability is 1) to get a high income after changing their profiles following your strategy. You must consider whether the cost is worthwhile compared to the reward.

### Mandatory output schema

Your output must have the following JSON schema **without** any additional explanation:

{
    "SCHL": {
        "Direction": "increase" or "decrease" or "N/A" if "Effort" is 0,
        "Effort": "the amount of effort allocated to this feature"
    },
    "WKHP": {
        "Direction": "increase" or "decrease" or "N/A" if "Effort" is 0,
        "Effort": "the amount of effort allocated to this feature"
    },
}

Note that you are allowed to allocate 0 effort to some feature. But when effort is 0, the "Direction" must be "N/A".
"""

In [None]:
import pandas as pd
df = pd.read_csv("../data/ACSIncome_sample_raw.csv")

In [None]:
import numpy as np
df['SEX'] = np.where(df['SEX']==2, 'female','male')
df

In [None]:
def parse_row_to_question(row):
    all_names = ['AGEP','SCHL','WKHP','SEX']
    question = f"User's financial profile: "
    for name in all_names:
        question += f"{name}={row[name]}, "
    question = question[:-2] + "."
    return question

parse_row_to_question(df.iloc[0])

### Test

In [None]:
from openai import OpenAI

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

response = client.chat.completions.create(
    model="gpt-4.1-2025-04-14",
    messages=[
        {"role":"system","content":system_prompt},
        {"role": "user", "content": "User profile:"+parse_row_to_question(df.iloc[0])}
    ],
    response_format={"type": "json_object"},
    temperature=0,
)

print(response.choices[0].message.content)

### make batch prediction file

In [None]:
request_list = []
for i in range(1000):
    cur = {"custom_id": f"{i}","method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4.1-2025-04-14", "messages": [{"role":"system","content":system_prompt}, {"role": "user", "content": "User profile:"+parse_row_to_question(df.iloc[i])}], "response_format": {"type": "json_object"}, "temperature": 0}}
    request_list.append(cur)

In [None]:
# create jsonl
import json
with open("income_requests_41.jsonl", "w") as f:
    for request in request_list:
        f.write(json.dumps(request) + "\n")


### batch prediction

In [None]:
batch_input_file = client.files.create(
    file=open("income_requests_41.jsonl", "rb"),
    purpose="batch"
)

print(batch_input_file)

In [None]:
batch_input_file_id = batch_input_file.id
client.batches.create(
    input_file_id=batch_input_file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
    metadata={
        "description": "income dataset job 41"
    }
)

In [None]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
batch = client.batches.retrieve("batch_67fd74898ac081909db00d29c6509c18")
print(batch)

In [None]:
file_response = client.files.content("file-18mfsUDZw1MgwVKYwbFe2c")
# write to json
with open("income_responses_41.jsonl", "w") as f:
    f.write(file_response.text)

### Parsing JSONL to JSON using ids

In [None]:
import json
responses = []
for i in range(1000):
    with open("income_responses_41.jsonl", "r") as f:
        for line in f:
            response = json.loads(line)
            if response["custom_id"] == str(i):
                responses.append(response['response']['body']['choices'][0]['message']['content'])

In [None]:
# convert each response to a dictionary
import ast
responses_dict = []
for response in responses:
    response_dict = ast.literal_eval(response)
    responses_dict.append(response_dict)


In [None]:
# convert response_dict to json
with open("41_income_valid.json", "w") as f:
    json.dump(responses_dict, f, indent=4)