In [8]:
using CSV, DataFrames, Statistics;

# Training OCT

## Read data

In [22]:
TRAINING_DATA_PATH = "../data/all_games_7_players.csv"

# Reading data
historic_data = CSV.read(TRAINING_DATA_PATH)[1:100,:]
X = historic_data[:, 2:end-1]
y = historic_data[:, :fp_p1]

# Selecting training columns
all_columns = names(X)
exclude_players_cols = all_columns
# exclude_players_cols = all_columns[.!endswith.(string.(all_columns),["p6"])]
# exclude_players_cols = exclude_players_cols[.!endswith.(string.(exclude_players_cols),["p7"])]
# exclude_players_cols = exclude_players_cols[.!endswith.(string.(exclude_players_cols),["p13"])]
# exclude_players_cols = exclude_players_cols[.!endswith.(string.(exclude_players_cols),["p14"])]
p1_cols = exclude_players_cols[endswith.(string.(all_columns), "_p1")];
fp_avg_cols =  exclude_players_cols[occursin.("fp_seas_avg", string.(all_columns))];
fp_l_cols =  exclude_players_cols[occursin.("fp_l5", string.(all_columns))];
train_cols = unique(vcat(p1_cols, fp_avg_cols, fp_l_cols));
X = X[:,train_cols];

train_proportion = 0.6
validation_proportion = 0.2
(train_X, train_y), (test_valid_X, test_valid_y) = IAI.split_data(:regression, X, y, seed=1, train_proportion=train_proportion);
(valid_X, valid_y), (test_X, test_y) = IAI.split_data(:regression, test_valid_X, test_valid_y, seed=1, train_proportion=validation_proportion/(1-train_proportion));

In [23]:
# Validation parameters
MAX_DEPTH = 8:8
CP=[0.0001]

# Default learner
default_lnr = IAI.OptimalTreeRegressor(
    random_seed=1,
    criterion=:mse,
    minbucket=10
    );

# Grid
grid = IAI.GridSearch(default_lnr,
    max_depth=MAX_DEPTH,
    cp=CP
);

print(grid)

GridSearch - Unfitted OptimalTreeRegressor:
  minbucket:   10
  random_seed: 1

GridSearch Params:
  (cp=0.0001,max_depth=8,)

In [24]:
# Fitting the grid
IAI.fit!(grid, train_X, train_y, valid_X, valid_y);

└ @ IAIBase C:\Users\iai\builds\InterpretableAI\SysImgBuilder\.julia\packages\IAIBase\ymcNn\src\precompile.jl:19
│ 1f11ab7e230722c068379caf77fc5c280b091387299cfab53194ef4c91f1afdc
└ @ IAIBase C:\Users\iai\builds\InterpretableAI\SysImgBuilder\.julia\packages\IAIBase\ymcNn\src\precompile.jl:29
[32mTraining trees...100%|██████████████████████████████████| Time: 0:00:08[39m


In [25]:
lnr = IAI.get_learner(grid);

# Retrieving best parameters
best_params = IAI.get_best_params(grid)
println(best_params)

grid_results = IAI.get_grid_results(grid)
println(grid_results)

var_importance = IAI.variable_importance(lnr)
println(var_importance)

train_accuracy = IAI.score(lnr,train_X, train_y, criterion=:mse);
valid_accuracy = IAI.score(lnr,valid_X, valid_y, criterion=:mse);
test_accuracy = IAI.score(lnr,test_X, test_y, criterion=:mse);
train_MAE = mean(abs.(IAI.predict(lnr, train_X) - train_y));
valid_MAE = mean(abs.(IAI.predict(lnr, valid_X) - valid_y));
test_MAE = mean(abs.(IAI.predict(lnr, test_X) - test_y));

println(string("Train R2 : ", train_accuracy))
println(string("Train MAE : ", train_MAE))

println(string("Valid R2 : ", valid_accuracy))
println(string("Valid MAE : ", valid_MAE))

println(string("Test R2 : ", test_accuracy))
println(string("Test MAE : ", test_MAE))

Dict{Symbol,Any}(:cp => 0.0001,:max_depth => 8)
1×5 DataFrame
│ Row │ max_depth │ cp      │ train_score │ valid_score │ rank_valid_score │
│     │ [90mInt64[39m     │ [90mFloat64[39m │ [90mFloat64[39m     │ [90mFloat64[39m     │ [90mInt64[39m            │
├─────┼───────────┼─────────┼─────────────┼─────────────┼──────────────────┤
│ 1   │ 8         │ 0.0001  │ 0.679679    │ -0.321997   │ 1                │
54×2 DataFrame
│ Row │ Feature         │ Importance  │
│     │ [90mSymbol[39m          │ [90mFloat64[39m     │
├─────┼─────────────────┼─────────────┤
│ 1   │ fga_seas_avg_p1 │ 0.200964    │
│ 2   │ fp_seas_avg_p1  │ 0.124483    │
│ 3   │ fgm_seas_avg_p1 │ 0.0941942   │
│ 4   │ ast_l5_p1       │ 0.0800249   │
│ 5   │ ftm_seas_avg_p1 │ 0.0387903   │
│ 6   │ ast_seas_avg_p1 │ 0.0299923   │
│ 7   │ stl_l5_p1       │ 0.0292826   │
│ 8   │ stl_seas_avg_p1 │ 0.0258507   │
│ 9   │ fp_l5_p7        │ 0.0255509   │
│ 10  │ fta_seas_avg_p1 │ 0.0249906   │
│ 11  │ fp_seas_avg_p3  │

In [26]:
IAI.write_html("../processed/OCTs/OCT.html", lnr);
IAI.write_json("../processed/OCTs/OCT.json", lnr);

In [27]:
train_X[:, :fp_p1] = convert(Array,train_y)
valid_X[:, :fp_p1] = convert(Array,valid_y)
test_X[:, :fp_p1] = convert(Array,test_y)
CSV.write("../processed/OCTs/oct_train_data.csv", train_X)
CSV.write("../processed/OCTs/oct_valid_data.csv", valid_X)
CSV.write("../processed/OCTs/oct_test_data.csv", test_X)

│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = top-level scope at In[27]:1
└ @ Core In[27]:1
│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = top-level scope at In[27]:2
└ @ Core In[27]:2
│     df[!, col_ind] = v
│     df
│ end` instead.
│   caller = top-level scope at In[27]:3
└ @ Core In[27]:3


"../processed/OCTs/oct_test_data.csv"