# Import Data/Pkg, Process Data

In [1]:
using Pkg, CategoricalArrays, CSV, DataFrames, Statistics
using StatsBase

In [2]:
data = CSV.read("train.csv", DataFrame);

In [3]:
df = data;

In [4]:
df.Vehicle_Damage = map(x -> x == "Yes" ? 1 : 0, df.Vehicle_Damage);
df.Vehicle_Age = map(x -> x == "> 2 Years" ? 2 : x == "1-2 Year" ? 1 : 0, df.Vehicle_Age);
df[!, "Gender"] = categorical(df[!, "Gender"])
df[!, "Annual_Premium"] = df[!, "Annual_Premium"].*0.012;

In [5]:
t_options = 100:100:1000

100:100:1000

## Split Data

In [6]:
seed = 12345; 
X = df[:, Not([:Response, :Annual_Premium, :id, :Region_Code, :Policy_Sales_Channel])]
y = df.Response
t = df.Annual_Premium
(train_X, train_t, train_y), (test_X, test_t, test_y) = IAI.split_data(
    :policy_maximize, X, t, y, train_proportion=0.5, seed=seed);

In [7]:
train_t_discrete = [min(round(x, digits=-2), 1000) < 100 ? 100 : min(round(x, digits=-2), 1000) for x in train_t]
test_t_discrete = [min(round(x, digits=-2), 1000) < 100 ? 100 : min(round(x, digits=-2), 1000) for x in test_t];

In [8]:
countmap(test_t_discrete)

Dict{Real, Int64} with 10 entries:
  800.0  => 2615
  200.0  => 4109
  500.0  => 32548
  300.0  => 40374
  1000.0 => 1250
  900.0  => 1008
  600.0  => 14369
  700.0  => 5623
  400.0  => 56233
  100    => 32426

## Prescriptive Tree

In [60]:
countmap(train_y)

Dict{Int64, Int64} with 2 entries:
  0 => 167171
  1 => 23383

In [48]:
train_X_df = hcat(train_X, train_t_discrete);

In [103]:
xgb = IAI.XGBoostClassifier(
        random_seed=1, max_depth = 5, num_estimators=250, max_categoric_levels_before_warning=20, )
IAI.fit!(xgb,train_X_df,Array(train_y));

In [104]:
IAI.ROCCurve(xgb, train_X_df, train_y)



In [105]:
xgb_pred=IAI.predict(xgb, train_X_df)
countmap(xgb_pred)

Dict{Int64, Int64} with 2 entries:
  0 => 190153
  1 => 401

In [106]:
IAI.score(xgb, train_X_df, train_y, criterion=:auc)

0.8618371547551407

In [96]:
xgb_prob=IAI.predict_proba(xgb, train_X_df)[!,2];

In [113]:
prescriptive_grid = IAI.GridSearch(
    IAI.OptimalTreePrescriptionMaximizer(
      random_seed=1,
      localsearch=false,
    ),
    max_depth=1:6,
)
# multiple the t(price)revenue with binary var y(1,0) buy or not buy
train_y_revenue = train_y .* train_t_discrete;

In [114]:
IAI.fit!(prescriptive_grid, train_X, train_t_discrete, train_y_revenue)

In [99]:
pred_treatments, pred_outcomes = IAI.predict(prescriptive_grid, test_X)

(Real[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0  …  1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 800.0, 1000.0, 1000.0, 900.0], [353.92188341230946, 353.92188341230946, 0.4917724791218916, 0.4917724791218916, 153.55254851636437, 290.6791643860439, 0.4917724791218916, 0.4917724791218916, 353.92188341230946, 235.6999018153208  …  353.92188341230946, 0.4917724791218916, 353.92188341230946, 265.00565848416755, 353.92188341230946, 265.00565848416755, 8.308620448224247, 353.92188341230946, 265.00565848416755, 44.68966798590759])

In [100]:
countmap(pred_treatments)

Dict{Real, Int64} with 4 entries:
  900.0  => 20640
  700.0  => 464
  1000.0 => 163135
  800.0  => 6316

In [115]:
pred_prob = pred_outcomes./pred_treatments;
pred_binary = pred_prob .>= 0.5;

In [116]:
sum(pred_outcomes.*pred_binary)

0.0

### Prescriptive Tree Evaulation

## Real Data

In [22]:
# Get revenue observed for test set in reality
test_revenue = test_y .* test_t;
print("The real life data suggest the annual revenue would be ", sum(test_revenue))
print("\n")

test_revenue_cut = test_y .* test_t_discrete;
print("If we do the cutoff on real life data, the annual revenue would be ", sum(test_revenue_cut))

The real life data suggest the annual revenue would be 8.851845936e6
If we do the cutoff on real life data, the annual revenue would be 9.0881e6

## Our Prescription

In [33]:
reward_lnr = IAI.NumericClassificationRewardEstimator(
    outcome_estimator=IAI.XGBoostClassifier(num_round=10),
    outcome_insample_num_folds=2,
    reward_estimator=:direct_method,
    estimation_kernel_bandwidth=1,
    random_seed=seed,
)

function get_rewards(reward_lnr, X, t, y, t_options)
  predictions, score = IAI.fit_predict!(reward_lnr, X, t, y, t_options,
                                        outcome_score_criterion=:auc)
  rewards = predictions[:reward]
  for t in t_options
    rewards[!, Symbol(t)] = round.(rewards[!, Symbol(t)] .* t, digits=3)
  end
  rewards, score
end
train_rewards, train_reward_score = get_rewards(reward_lnr, train_X, train_t_discrete,
                                                train_y, t_options)
train_rewards

Row,100,200,300,400,500,600,700,800,900,1000
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,3.101,8.842,14.141,10.759,14.063,20.654,15.839,25.963,28.692,29.657
2,14.094,22.194,27.6,54.552,70.734,69.247,77.908,49.112,46.613,90.961
3,2.278,4.522,6.737,9.259,11.383,13.723,16.175,19.322,22.002,23.923
4,2.278,4.522,6.737,9.259,11.383,13.723,16.175,19.322,22.002,23.923
5,28.416,47.233,86.22,126.352,169.403,195.783,267.792,207.891,280.271,424.877
6,2.274,4.586,6.769,9.159,11.161,13.658,15.839,19.812,21.535,23.377
7,25.744,21.061,58.848,62.675,97.485,192.127,141.875,92.315,523.358,81.112
8,12.288,13.36,41.945,60.443,58.114,76.321,111.072,138.247,146.848,126.628
9,2.278,4.522,6.737,9.259,11.383,13.723,16.175,19.322,65.753,23.923
10,27.917,37.25,100.471,128.733,159.227,195.783,264.231,504.633,368.833,309.24


In [35]:
train_reward_score[:outcome]

Dict{String, Float64} with 10 entries:
  "300"  => 0.881468
  "400"  => 0.852542
  "1000" => 0.771416
  "600"  => 0.813256
  "800"  => 0.814287
  "700"  => 0.804718
  "100"  => 0.78973
  "200"  => 0.898189
  "500"  => 0.831272
  "900"  => 0.728133

In [None]:
test_rewards, test_reward_score = get_rewards(reward_lnr, test_X, test_t_discrete,
                                              test_y, t_options)
test_rewards

In [46]:
test_reward_score[:outcome]

Dict{String, Float64} with 10 entries:
  "300"  => 0.880268
  "400"  => 0.851357
  "1000" => 0.799334
  "600"  => 0.81237
  "800"  => 0.787536
  "700"  => 0.800373
  "100"  => 0.78816
  "200"  => 0.89275
  "500"  => 0.827358
  "900"  => 0.768116

In [18]:
function evaluate(recommendations, outcomes, actual_revenue)
  n = length(recommendations)
  pred_revenue = [outcomes[i, recommendations[i]] for i in 1:n]

  improvement = mean(pred_revenue .- actual_revenue) / mean(actual_revenue)
end

evaluate (generic function with 1 method)

In [55]:
function evaluate2(recommendations, outcomes, actual_revenue)
  n = length(recommendations)
  pred_revenue = [outcomes[i, recommendations[i]] for i in 1:n]

  tot_rev = sum(pred_revenue)
end

evaluate2 (generic function with 1 method)

In [37]:
test_revenue = test_y .* test_t_discrete;

In [53]:
recommendations_pres = Symbol.(Int.(IAI.predict(prescriptive_grid, test_X)[1]))

190555-element Vector{Symbol}:
 Symbol("1000")
 Symbol("800")
 Symbol("800")
 Symbol("800")
 Symbol("800")
 Symbol("900")
 Symbol("800")
 Symbol("800")
 Symbol("1000")
 Symbol("900")
 Symbol("900")
 Symbol("800")
 Symbol("900")
 ⋮
 Symbol("800")
 Symbol("1000")
 Symbol("1000")
 Symbol("800")
 Symbol("1000")
 Symbol("1000")
 Symbol("1000")
 Symbol("1000")
 Symbol("1000")
 Symbol("1000")
 Symbol("900")
 Symbol("1000")

In [54]:
evaluate(recommendations_pres, test_rewards, test_revenue)

1.9656464036487267

In [56]:
evaluate2(recommendations_pres, test_rewards, test_revenue)

2.6952091080999993e7