In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing
import json
from data_utils import *
from model import *
from algorithm import *
import random
import os
import torch.optim as optim

In [None]:
def read_json(file):
    """
    read the resulting json file
    """
    with open(file, 'r') as f:
        data = json.load(f)
    return data

### Hiring

In [None]:
c_idx = [0,3]
nc_idx = [1,2]
h, f = logReg(5), logReg(5)
h.load_state_dict(torch.load('h_models/h_hiring_lr.pth'))
f.load_state_dict(torch.load('f_models/f_lr_hiring.pth'))

In [None]:
# Read best responses
GPT_output = read_json('data/4o_hiring_valid.json')
GPT_output[0]

In [None]:
original = pd.read_csv('data/hiring_data_sample.csv')
original['z'] = original['age']
X_original = original[['education','YearsCode','PreviousSalary','ComputerSkills','z']]
data = Hiring(device='cpu', decision=False)
X_original = (X_original - data.mean)/data.std_dev
X_original = X_original.to_numpy()
X_original = torch.Tensor(X_original)

# simulate best response
We = torch.tensor([1,1,2,2], dtype = torch.float32)
features = ['education','YearsCode','PreviousSalary','ComputerSkills'] 
efforts_GPT = np.zeros((1000, 4))
for i in range(1000):
    # read efforts
    item = GPT_output[i]
    for f in features:
        if f in item.keys() and item[f] != {}:
            if item[f]['Direction'] == 'decrease':
                efforts_GPT[i][features.index(f)] = (-1.0) * item[f]['Effort']
            else:
                efforts_GPT[i][features.index(f)] = (1.0) * item[f]['Effort']
            

# get the corresponding effort, x_star, x_improve for ChatGPT
x_star_GPT = X_original.clone()
x_star_GPT[:,:-1]  = X_original[:,:-1] + torch.Tensor(efforts_GPT)*We
x_improve_GPT = x_star_GPT.clone()
x_improve_GPT[:,nc_idx] = X_original[:,nc_idx]

In [None]:
# sanity check
print(x_improve_GPT[0])
print(x_star_GPT[0])
print(X_original[0])
print(np.isnan(x_improve_GPT.detach().numpy()).any())
print(np.isnan(x_star_GPT.detach().numpy()).any())

In [None]:
original_scores = np.round(h(X_original).detach().numpy(),3).reshape(-1)
improve_scores = np.round(h(x_improve_GPT).detach().numpy(),3).reshape(-1)
strategy_scores = np.round(h(x_star_GPT).detach().numpy(),3).reshape(-1)
print(original_scores.mean())
print(improve_scores.mean())
print(strategy_scores.mean())

In [None]:
g = np.array(original['z'])

# save scores
np.save('results/4o_hiring_improve_scores_GPT.npy', improve_scores)
np.save('results/4o_hiring_strategy_scores_GPT.npy', strategy_scores)

# save efforts
np.save('results/4o_hiring_efforts_GPT.npy',efforts_GPT)

# save groupwise scores
np.save('results/4o_hiring_original_scores_0_GPT.npy', original_scores[g==0])
np.save('results/4o_hiring_improve_scores_0_GPT.npy', improve_scores[g==0])
np.save('results/4o_hiring_strategy_scores_0_GPT.npy', strategy_scores[g==0])

np.save('results/4o_hiring_original_scores_1_GPT.npy', original_scores[g==1])
np.save('results/4o_hiring_improve_scores_1_GPT.npy', improve_scores[g==1])
np.save('results/4o_hiring_strategy_scores_1_GPT.npy', strategy_scores[g==1])

# save groupwise efforts
np.save('results/4o_hiring_efforts_0_GPT.npy', efforts_GPT[g==0])
np.save('results/4o_hiring_efforts_1_GPT.npy', efforts_GPT[g==1])


### LAW

In [None]:
c_idx = [0,1]
nc_idx = []
h, f = logReg(3), logReg(3)
h.load_state_dict(torch.load('h_models/h_law_lr.pth'))
f.load_state_dict(torch.load('f_models/f_lr_law.pth'))

In [None]:
# Read best responses
GPT_output = read_json('data/4o_law_valid.json')
GPT_output[1]

In [None]:
original = pd.read_csv('data/bar_pass_data_sample.csv')
original['z'] = original['sex']
X_original = original[['ugpa','lsat','z']]
data = LawDataset(device='cpu', decision=False)
X_original = (X_original - data.mean)/data.std_dev
X_original = X_original.to_numpy()
X_original = torch.Tensor(X_original)

# simulate best response
We = torch.tensor([0.5,0.5], dtype = torch.float32)
features = ['UGPA','LSAT']

efforts_GPT = np.zeros((1000, 2))
for i in range(1000):
    # read efforts
    item = GPT_output[i]
    for f in features:
        if f in item.keys() and item[f] != {}:
            if item[f]['Direction'] == 'decrease':
                efforts_GPT[i][features.index(f)] = (-1.0) * item[f]['Effort']
            else:
                efforts_GPT[i][features.index(f)] = (1.0) * item[f]['Effort']

# get the corresponding effort, x_star, x_improve for ChatGPT
x_star_GPT = X_original.clone()
x_star_GPT[:,[0,1]]  = X_original[:,[0,1]] + torch.Tensor(efforts_GPT)*We
x_improve_GPT = x_star_GPT.clone()
x_improve_GPT[:,nc_idx] = X_original[:,nc_idx]

In [None]:
# sanity check
print(x_improve_GPT[1])
print(x_star_GPT[1])
print(X_original[1])
print(np.isnan(x_improve_GPT.detach().numpy()).any())
print(np.isnan(x_star_GPT.detach().numpy()).any())

In [None]:
original_scores = np.round(h(X_original).detach().numpy(),3).reshape(-1)
improve_scores = np.round(h(x_improve_GPT).detach().numpy(),3).reshape(-1)
strategy_scores = np.round(h(x_star_GPT).detach().numpy(),3).reshape(-1)
print(original_scores.mean())
print(improve_scores.mean())
print(strategy_scores.mean())

In [None]:
g = np.array(original['z'])

# save scores
np.save('results/4o_law_improve_scores_GPT.npy', improve_scores)
np.save('results/4o_law_strategy_scores_GPT.npy', strategy_scores)

# save efforts
np.save('results/4o_law_efforts_GPT.npy', efforts_GPT)

# save groupwise scores
np.save('results/4o_law_original_scores_0_GPT.npy', original_scores[g==1])
np.save('results/4o_law_improve_scores_0_GPT.npy', improve_scores[g==1])
np.save('results/4o_law_strategy_scores_0_GPT.npy', strategy_scores[g==1])

np.save('results/4o_law_original_scores_1_GPT.npy', original_scores[g==2])
np.save('results/4o_law_improve_scores_1_GPT.npy', improve_scores[g==2])
np.save('results/4o_law_strategy_scores_1_GPT.npy', strategy_scores[g==2])

# save groupwise efforts
np.save('results/4o_law_efforts_0_GPT.npy', efforts_GPT[g==1])
np.save('results/4o_law_efforts_1_GPT.npy', efforts_GPT[g==2])


### Credit

- specify stategic indexes

In [None]:
c_idx = [2,3]
nc_idx = [0,4,6]

- Get the decision models

In [None]:
h, f = logReg(10), logReg(10)
h.load_state_dict(torch.load('h_models/h_credit_lr.pth'))
f.load_state_dict(torch.load('f_models/f_LR_credit.pth'))

- Simulate best responses

In [None]:
# Read best responses
GPT_output = read_json('data/4o_credit_valid.json')
GPT_output[0]

In [None]:
data = CreditDataset(device = 'cpu', decision=False)
original = pd.read_csv('data/balanced_test_dataset.csv')
original['z'] = np.where(original['age'] > 35, 0, 1)
X_original = original.drop(columns = ['age','question', 'qualification_status'])
X_original = (X_original - data.mean)/data.std_dev
X_original = X_original.to_numpy()
X_original = torch.Tensor(X_original)
features = ['DebtRatio', 'MonthlyIncome', 'RevolvingUtilizationOfUnsecuredLines','NumberOfOpenCreditLinesAndLoans', 'NumberRealEstateLoansOrLines']
# simulate best response
We = torch.tensor([2,0.5,0.5,2,2], dtype = torch.float32)

efforts_GPT = np.zeros((1000, 5))
for i in range(1000):
    # read efforts
    item = GPT_output[i]
    for f in features:
        if f in item.keys() and item[f] != {}:
            if item[f]['Direction'] == 'decrease':
                efforts_GPT[i][features.index(f)] = (-1.0) * item[f]['Effort']
            else:
                efforts_GPT[i][features.index(f)] = (1.0) * item[f]['Effort']


# get the corresponding effort, x_star, x_improve for ChatGPT
x_star_GPT = X_original.clone()
x_star_GPT[:,[2,3,0,4,6]]  = X_original[:,[2,3,0,4,6]] + torch.Tensor(efforts_GPT)*We
x_improve_GPT = x_star_GPT.clone()
x_improve_GPT[:,nc_idx] = X_original[:,nc_idx]

In [None]:
efforts_GPT.mean(axis=0)

- Sanity check

In [None]:
# sanity check
print(x_improve_GPT[0])
print(x_star_GPT[0])
print(X_original[0])
print(np.isnan(x_improve_GPT.detach().numpy()).any())
print(np.isnan(x_star_GPT.detach().numpy()).any())

- Get score increase

In [None]:
original_scores = np.round(h(X_original).detach().numpy(),3).reshape(-1)
improve_scores = np.round(h(x_improve_GPT).detach().numpy(),3).reshape(-1)
strategy_scores = np.round(h(x_star_GPT).detach().numpy(),3).reshape(-1)
print(original_scores.mean())
print(improve_scores.mean())
print(strategy_scores.mean())

- Save results

In [None]:
g = np.array(original['z'])

# save scores
np.save('results/4o_credit_improve_scores_GPT.npy', improve_scores)
np.save('results/4o_credit_strategy_scores_GPT.npy', strategy_scores)

# save efforts
np.save('results/4o_credit_efforts_GPT.npy', efforts_GPT)

# save groupwise scores
np.save('results/4o_credit_original_scores_0_GPT.npy', original_scores[g==0])
np.save('results/4o_credit_improve_scores_0_GPT.npy', improve_scores[g==0])
np.save('results/4o_credit_strategy_scores_0_GPT.npy', strategy_scores[g==0])

np.save('results/4o_credit_original_scores_1_GPT.npy', original_scores[g==1])
np.save('results/4o_credit_improve_scores_1_GPT.npy', improve_scores[g==1])
np.save('results/4o_credit_strategy_scores_1_GPT.npy', strategy_scores[g==1])

# save groupwise efforts
np.save('results/4o_credit_efforts_0_GPT.npy', efforts_GPT[g==0])
np.save('results/4o_credit_efforts_1_GPT.npy', efforts_GPT[g==1])


### ACSIncome

In [None]:
c_idx = [0,1]
nc_idx = []

In [None]:
h, f = logReg(4), logReg(4)
h.load_state_dict(torch.load('h_models/h_income_lr.pth'))
f.load_state_dict(torch.load('f_models/f_LR_income.pth'))

# Read best responses
GPT_output = read_json('data/4o_income_valid.json')
GPT_output[0]

In [None]:
original = pd.read_csv('data/ACSIncome_sample_raw.csv')
original['z'] = np.where(original['AGEP'] > 35, 0, 1)
X_original = original[['SCHL','WKHP','SEX','z']]
X_original = X_original.to_numpy()
X_original = torch.Tensor(X_original)


We = torch.tensor([1,2], dtype = torch.float32)
features = ['SCHL','WKHP']
efforts_GPT = np.zeros((1000, 2))
for i in range(1000):
    # read efforts
    item = GPT_output[i]
    for f in features:
        if f in item.keys() and item[f] != {}:
            if item[f]['Direction'] == 'decrease':
                efforts_GPT[i][features.index(f)] = (-1.0) * item[f]['Effort']
            else:
                efforts_GPT[i][features.index(f)] = (1.0) * item[f]['Effort']
    

# get the corresponding effort, x_star, x_improve for ChatGPT
x_star_GPT = X_original.clone()
x_star_GPT[:,c_idx+nc_idx]  = X_original[:,c_idx+nc_idx] + torch.Tensor(efforts_GPT)*We
x_improve_GPT = x_star_GPT.clone()
x_improve_GPT[:,nc_idx] = X_original[:,nc_idx]

In [None]:
# sanity check
print(x_improve_GPT[0])
print(x_star_GPT[0])
print(X_original[0])
print(np.isnan(x_improve_GPT.detach().numpy()).any())
print(np.isnan(x_star_GPT.detach().numpy()).any())

In [None]:
original_scores = np.round(h(X_original).detach().numpy(),3).reshape(-1)
improve_scores = np.round(h(x_improve_GPT).detach().numpy(),3).reshape(-1)
strategy_scores = np.round(h(x_star_GPT).detach().numpy(),3).reshape(-1)
print(original_scores.mean())
print(improve_scores.mean())
print(strategy_scores.mean())

In [None]:
g = np.array(original['z'])

# save scores
np.save('results/4o_income_improve_scores_GPT.npy', improve_scores)
np.save('results/4o_income_strategy_scores_GPT.npy', strategy_scores)

# save efforts
np.save('results/4o_income_efforts_GPT.npy', efforts_GPT)

# save groupwise scores
np.save('results/4o_income_original_scores_0_GPT.npy', original_scores[g==0])
np.save('results/4o_income_improve_scores_0_GPT.npy', improve_scores[g==0])
np.save('results/4o_income_strategy_scores_0_GPT.npy', strategy_scores[g==0])

np.save('results/4o_income_original_scores_1_GPT.npy', original_scores[g==1])
np.save('results/4o_income_improve_scores_1_GPT.npy', improve_scores[g==1])
np.save('results/4o_income_strategy_scores_1_GPT.npy', strategy_scores[g==1])

# save groupwise efforts
np.save('results/4o_income_efforts_0_GPT.npy', efforts_GPT[g==0])
np.save('results/4o_income_efforts_1_GPT.npy', efforts_GPT[g==1])


### ACSPAP

In [None]:
c_idx = [1,2]
nc_idx = [0]

In [None]:
h, f = logReg(4), logReg(4)
h.load_state_dict(torch.load('h_models/h_pap_lr.pth'))
f.load_state_dict(torch.load('f_models/f_lr_pap.pth'))

# Read best responses
GPT_output = read_json('data/4o_pap_valid.json')
GPT_output[:10]

In [None]:
original = pd.read_csv('data/ACSPAP_sample.csv')
original['z'] = np.where(original['AGEP'] > 35, 0, 1)
X_original = original[['WKHP','SCHL','PINCP','z']]
data = ACSPAP(device='cpu', decision=False)
X_original = (X_original - data.mean)/data.std_dev
X_original = X_original.to_numpy()
X_original = torch.Tensor(X_original)

# simulate best response
We = torch.tensor([2,1,1], dtype = torch.float32)
features = ['SCHL','PINCP','WKHP']

efforts_GPT = np.zeros((1000, 3))
for i in range(1000):
    # read efforts
    item = GPT_output[i]
    for f in features:
        if f in item.keys() and item[f] != {}:
            if item[f]['Direction'] == 'decrease':
                efforts_GPT[i][features.index(f)] = (-1.0) * item[f]['Effort']
            else:
                efforts_GPT[i][features.index(f)] = (1.0) * item[f]['Effort']
    

# get the corresponding effort, x_star, x_improve for ChatGPT
x_star_GPT = X_original.clone()
x_star_GPT[:,[1,2,0]]  = X_original[:,[1,2,0]] + torch.Tensor(efforts_GPT)*We
x_improve_GPT = x_star_GPT.clone()
x_improve_GPT[:,nc_idx] = X_original[:,nc_idx]

In [None]:
# sanity check
print(x_improve_GPT[1])
print(x_star_GPT[1])
print(X_original[1])
print(np.isnan(x_improve_GPT.detach().numpy()).any())
print(np.isnan(x_star_GPT.detach().numpy()).any())

In [None]:
original_scores = np.round(h(X_original).detach().numpy(),3).reshape(-1)
improve_scores = np.round(h(x_improve_GPT).detach().numpy(),3).reshape(-1)
strategy_scores = np.round(h(x_star_GPT).detach().numpy(),3).reshape(-1)
print(original_scores.mean())
print(improve_scores.mean())
print(strategy_scores.mean())

In [None]:
g = np.array(original['z'])

# save scores
np.save('results/4o_PAP_improve_scores_GPT.npy', improve_scores)
np.save('results/4o_PAP_strategy_scores_GPT.npy', strategy_scores)

# save efforts
np.save('results/4o_PAP_efforts_GPT.npy', efforts_GPT)

# save groupwise scores
np.save('results/4o_PAP_original_scores_0_GPT.npy', original_scores[g==0])
np.save('results/4o_PAP_improve_scores_0_GPT.npy', improve_scores[g==0])
np.save('results/4o_PAP_strategy_scores_0_GPT.npy', strategy_scores[g==0])

np.save('results/4o_PAP_original_scores_1_GPT.npy', original_scores[g==1])
np.save('results/4o_PAP_improve_scores_1_GPT.npy', improve_scores[g==1])
np.save('results/4o_PAP_strategy_scores_1_GPT.npy', strategy_scores[g==1])

# save groupwise efforts
np.save('results/4o_PAP_efforts_0_GPT.npy', efforts_GPT[g==0])
np.save('results/4o_PAP_efforts_1_GPT.npy', efforts_GPT[g==1])
