# Prediction Analysis

# Train 1st half, Test 2nd half (2019, 1 year imputation)

## Reading in Data

In [3]:
using CSV, DataFrames, Statistics, Random

In [13]:
train_2019 = DataFrame(CSV.File("../../data/Imputed_Final/train_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
test_2019 = DataFrame(CSV.File("../../data/Imputed_Final/test_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
# Making variables categorical variables, train set
categorical!(train_2019, :result)
categorical!(train_2019, :playoffs)
# Same categorical for testing set
categorical!(test_2019, :result)
categorical!(test_2019, :playoffs)
;

In [14]:
train_2019_X = train_2019[:,Not(:result)]
train_2019_y = train_2019[:,:result]
test_2019_X = test_2019[:,Not(:result)]
test_2019_y = test_2019[:,:result]
;

## Initial Try 

In [21]:
lnr_2019 = IAI.OptimalTreeClassifier(random_seed=15095, 
    max_depth=7, 
    cp=0.00334765, 
    minbucket=20, 
    missingdatamode=:always_right, 
    treat_unknown_level_missing=true)

IAI.fit!(lnr_2019,train_2019_X,train_2019_y)
;

[32mTraining trees...                  100%|████████████████| Time: 0:04:54[39m




In [22]:
IAI.score(lnr_2019, test_2019_X, test_2019_y, criterion=:auc)

0.5256041224954014

In [23]:
IAI.score(lnr_2019, test_2019_X, test_2019_y, criterion=:accuracy, positive_label = 1)

0.5262303948080044

## CV 1

In [29]:
#### CROSS VALIDATION
lnr_2019_gini = IAI.OptimalTreeClassifier(random_seed=15095, 
    criterion=:gini,
    treat_unknown_level_missing=true, 
    missingdatamode=:separate_class)

grid_2019_gini_1 = IAI.GridSearch(lnr_2019_gini,
    max_depth=[3,7,10],
    minbucket = [20,30,40]
)
IAI.fit!(grid_2019_gini_1, train_2019_X, train_2019_y, validation_criterion=:auc)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  minbucket=>20 max_depth=>3[39m

[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:01:20[39m

[A2mTraining trees...                    2%|▍               |  ETA: 0:01:32[39m

[A2mTraining trees...                    3%|▌               |  ETA: 0:01:16[39m

[A2mTraining trees...                    4%|▋               |  ETA: 0:01:30[39m

[A2mTraining trees...                    5%|▊               |  ETA: 0:01:24[39m

[A2mTraining trees...                    6%|█               |  ETA: 0:01:30[39m

[A2mTraining trees...                    7%|█▏              |  ETA: 0:01:25[39m

[A2mTraining trees...                    8%|█▎              |  ETA: 0:01:31[39m

[A2mTraining trees...                    9%|█▌              |  ETA: 0:01:25[39m

[A2mTraining trees...                   10

[A2mTraining trees...                   97%|███████████████▌|  ETA: 0:00:03[39m

[A2mTraining trees...                   98%|███████████████▋|  ETA: 0:00:02[39m

[A2mTraining trees...                   99%|███████████████▉|  ETA: 0:00:01[39m

[A2mTraining trees...                  100%|████████████████| Time: 0:01:23[39m
[32mParameter combination...            11%|█▊              |  ETA: 0:11:50[39m
[A4m  Parameters:  minbucket=>30 max_depth=>3[39m

[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:01:12[39m

[A2mTraining trees...                    2%|▍               |  ETA: 0:01:25[39m

[A2mTraining trees...                    3%|▌               |  ETA: 0:01:08[39m

[A2mTraining trees...                    4%|▋               |  ETA: 0:01:30[39m

[A2mTraining trees...                    5%|▊               |  ETA: 0:01:23[39m

[A2mTraining trees...                  

[A2mTraining trees...                   93%|██████████████▉ |  ETA: 0:00:06[39m

[A2mTraining trees...                   94%|███████████████ |  ETA: 0:00:05[39m

[A2mTraining trees...                   95%|███████████████▎|  ETA: 0:00:04[39m

[A2mTraining trees...                   96%|███████████████▍|  ETA: 0:00:03[39m

[A2mTraining trees...                   97%|███████████████▌|  ETA: 0:00:02[39m

[A2mTraining trees...                   98%|███████████████▋|  ETA: 0:00:02[39m

[A2mTraining trees...                   99%|███████████████▉|  ETA: 0:00:01[39m

[A2mTraining trees...                  100%|████████████████| Time: 0:01:22[39m
[32mParameter combination...            22%|███▌            |  ETA: 0:10:04[39m
[A4m  Parameters:  minbucket=>40 max_depth=>3[39m

[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:02:30[39m

[A2mTraining trees...                  

[A2mTraining trees...                   89%|██████████████▎ |  ETA: 0:00:08[39m

[A2mTraining trees...                   90%|██████████████▍ |  ETA: 0:00:08[39m

[A2mTraining trees...                   91%|██████████████▌ |  ETA: 0:00:07[39m

[A2mTraining trees...                   92%|██████████████▊ |  ETA: 0:00:06[39m

[A2mTraining trees...                   93%|██████████████▉ |  ETA: 0:00:05[39m

[A2mTraining trees...                   94%|███████████████ |  ETA: 0:00:05[39m

[A2mTraining trees...                   95%|███████████████▎|  ETA: 0:00:04[39m

[A2mTraining trees...                   96%|███████████████▍|  ETA: 0:00:03[39m

[A2mTraining trees...                   97%|███████████████▌|  ETA: 0:00:02[39m

[A2mTraining trees...                   98%|███████████████▋|  ETA: 0:00:02[39m

[A2mTraining trees...                   99%|███████████████▉|  ETA: 0:00:01[39m

[A2mTraining trees...                  100%|████████████████| Time: 0:01:17[39m
[32m

[A2mTraining trees...                   85%|█████████████▋  |  ETA: 0:01:06[39m

[A2mTraining trees...                   86%|█████████████▊  |  ETA: 0:01:02[39m

[A2mTraining trees...                   87%|█████████████▉  |  ETA: 0:00:58[39m

[A2mTraining trees...                   88%|██████████████▏ |  ETA: 0:00:53[39m

[A2mTraining trees...                   89%|██████████████▎ |  ETA: 0:00:49[39m

[A2mTraining trees...                   90%|██████████████▍ |  ETA: 0:00:45[39m

[A2mTraining trees...                   91%|██████████████▌ |  ETA: 0:00:40[39m

[A2mTraining trees...                   92%|██████████████▊ |  ETA: 0:00:36[39m

[A2mTraining trees...                   93%|██████████████▉ |  ETA: 0:00:31[39m

[A2mTraining trees...                   94%|███████████████ |  ETA: 0:00:27[39m

[A2mTraining trees...                   95%|███████████████▎|  ETA: 0:00:22[39m

[A2mTraining trees...                   96%|███████████████▍|  ETA: 0:00:18[39m

[A2

[A2mTraining trees...                   81%|█████████████   |  ETA: 0:01:05[39m

[A2mTraining trees...                   82%|█████████████▏  |  ETA: 0:01:02[39m

[A2mTraining trees...                   83%|█████████████▎  |  ETA: 0:00:58[39m

[A2mTraining trees...                   84%|█████████████▌  |  ETA: 0:00:54[39m

[A2mTraining trees...                   85%|█████████████▋  |  ETA: 0:00:51[39m

[A2mTraining trees...                   86%|█████████████▊  |  ETA: 0:00:48[39m

[A2mTraining trees...                   87%|█████████████▉  |  ETA: 0:00:44[39m

[A2mTraining trees...                   88%|██████████████▏ |  ETA: 0:00:41[39m

[A2mTraining trees...                   89%|██████████████▎ |  ETA: 0:00:37[39m

[A2mTraining trees...                   90%|██████████████▍ |  ETA: 0:00:34[39m

[A2mTraining trees...                   91%|██████████████▌ |  ETA: 0:00:30[39m

[A2mTraining trees...                   92%|██████████████▊ |  ETA: 0:00:27[39m

[A2

[A2mTraining trees...                   77%|████████████▍   |  ETA: 0:01:01[39m

[A2mTraining trees...                   78%|████████████▌   |  ETA: 0:00:58[39m

[A2mTraining trees...                   79%|████████████▋   |  ETA: 0:00:56[39m

[A2mTraining trees...                   80%|████████████▊   |  ETA: 0:00:53[39m

[A2mTraining trees...                   81%|█████████████   |  ETA: 0:00:50[39m

[A2mTraining trees...                   82%|█████████████▏  |  ETA: 0:00:47[39m

[A2mTraining trees...                   83%|█████████████▎  |  ETA: 0:00:45[39m

[A2mTraining trees...                   84%|█████████████▌  |  ETA: 0:00:42[39m

[A2mTraining trees...                   85%|█████████████▋  |  ETA: 0:00:39[39m

[A2mTraining trees...                   86%|█████████████▊  |  ETA: 0:00:37[39m

[A2mTraining trees...                   87%|█████████████▉  |  ETA: 0:00:34[39m

[A2mTraining trees...                   88%|██████████████▏ |  ETA: 0:00:31[39m

[A2

[A2mTraining trees...                   73%|███████████▋    |  ETA: 0:01:32[39m

[A2mTraining trees...                   74%|███████████▉    |  ETA: 0:01:29[39m

[A2mTraining trees...                   75%|████████████    |  ETA: 0:01:25[39m

[A2mTraining trees...                   76%|████████████▏   |  ETA: 0:01:22[39m

[A2mTraining trees...                   77%|████████████▍   |  ETA: 0:01:19[39m

[A2mTraining trees...                   78%|████████████▌   |  ETA: 0:01:15[39m

[A2mTraining trees...                   79%|████████████▋   |  ETA: 0:01:12[39m

[A2mTraining trees...                   80%|████████████▊   |  ETA: 0:01:08[39m

[A2mTraining trees...                   81%|█████████████   |  ETA: 0:01:05[39m

[A2mTraining trees...                   82%|█████████████▏  |  ETA: 0:01:02[39m

[A2mTraining trees...                   83%|█████████████▎  |  ETA: 0:00:58[39m

[A2mTraining trees...                   84%|█████████████▌  |  ETA: 0:00:55[39m

[A2

[A2mTraining trees...                   69%|███████████     |  ETA: 0:01:20[39m

[A2mTraining trees...                   70%|███████████▎    |  ETA: 0:01:17[39m

[A2mTraining trees...                   71%|███████████▍    |  ETA: 0:01:15[39m

[A2mTraining trees...                   72%|███████████▌    |  ETA: 0:01:12[39m

[A2mTraining trees...                   73%|███████████▋    |  ETA: 0:01:09[39m

[A2mTraining trees...                   74%|███████████▉    |  ETA: 0:01:07[39m

[A2mTraining trees...                   75%|████████████    |  ETA: 0:01:04[39m

[A2mTraining trees...                   76%|████████████▏   |  ETA: 0:01:01[39m

[A2mTraining trees...                   77%|████████████▍   |  ETA: 0:00:59[39m

[A2mTraining trees...                   78%|████████████▌   |  ETA: 0:00:56[39m

[A2mTraining trees...                   79%|████████████▋   |  ETA: 0:00:54[39m

[A2mTraining trees...                   80%|████████████▊   |  ETA: 0:00:51[39m

[A2

[A2mTraining trees...                   65%|██████████▍     |  ETA: 0:01:06[39m

[A2mTraining trees...                   66%|██████████▌     |  ETA: 0:01:04[39m

[A2mTraining trees...                   67%|██████████▊     |  ETA: 0:01:02[39m

[A2mTraining trees...                   68%|██████████▉     |  ETA: 0:01:00[39m

[A2mTraining trees...                   69%|███████████     |  ETA: 0:00:58[39m

[A2mTraining trees...                   70%|███████████▎    |  ETA: 0:00:56[39m

[A2mTraining trees...                   71%|███████████▍    |  ETA: 0:00:54[39m

[A2mTraining trees...                   72%|███████████▌    |  ETA: 0:00:52[39m

[A2mTraining trees...                   73%|███████████▋    |  ETA: 0:00:50[39m

[A2mTraining trees...                   74%|███████████▉    |  ETA: 0:00:48[39m

[A2mTraining trees...                   75%|████████████    |  ETA: 0:00:46[39m

[A2mTraining trees...                   76%|████████████▏   |  ETA: 0:00:45[39m

[A2








[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:02:44[39m

[A2mTraining trees...                    2%|▍               |  ETA: 0:02:36[39m

[A2mTraining trees...                    3%|▌               |  ETA: 0:02:08[39m

[A2mTraining trees...                    4%|▋               |  ETA: 0:02:23[39m

[A2mTraining trees...                    5%|▊               |  ETA: 0:02:15[39m

[A2mTraining trees...                    6%|█               |  ETA: 0:02:16[39m

[A2mTraining trees...                    7%|█▏              |  ETA: 0:02:06[39m

[A2mTraining trees...                    8%|█▎              |  ETA: 0:02:06[39m

[A2mTraining trees...                    9%|█▌              |  ETA: 0:02:06[39m

[A2mTraining trees...                   10%|█▋              |  ETA: 0:02:05[39m

[A2mTraining trees...                   11%|█▊              |  ETA: 0:02:00[39m

[A2mT

[A2mTraining trees...                   98%|███████████████▋|  ETA: 0:00:03[39m

[A2mTraining trees...                   99%|███████████████▉|  ETA: 0:00:01[39m

[A2mTraining trees...                  100%|████████████████| Time: 0:02:04[39m
[32mRefitting with best parameters...  100%|████████████████| Time: 0:02:05[39m
[A4m  Parameters:  minbucket=>30 cp=>0.01223 max_depth=>10[39m






All Grid Results:

│ Row │ minbucket │ max_depth │ cp        │ train_score │ valid_score │
│     │ [90mInt64[39m     │ [90mInt64[39m     │ [90mFloat64[39m   │ [90mFloat64[39m     │ [90mFloat64[39m     │
├─────┼───────────┼───────────┼───────────┼─────────────┼─────────────┤
│ 1   │ 20        │ 3         │ 0.0225639 │ 0.142259    │ 0.552504    │
│ 2   │ 20        │ 7         │ 0.0149407 │ 0.522391    │ 0.559303    │
│ 3   │ 20        │ 10        │ 0.0120575 │ 0.545111    │ 0.572057    │
│ 4   │ 30        │ 3         │ 0.0224873 │ 0.142259    │ 0.561091    │
│ 5   │ 30        │ 7         │ 0.0142513 │ 0.440542    │ 0.563934    │
│ 6   │ 30        │ 10        │ 0.012229  │ 0.453549    │ 0.574938    │
│ 7   │ 40        │ 3         │ 0.0211817 │ 0.148633    │ 0.569714    │
│ 8   │ 40        │ 7         │ 0.0157749 │ 0.377466    │ 0.558947    │
│ 9   │ 40        │ 10        │ 0.0135602 │ 0.386191    │ 0.563167    │

│ Row │ rank_valid_score │
│     │ [90mInt64[39m            │
├─

In [30]:
lnr_2019_gini_best_1 = IAI.get_learner(grid_2019_gini_1)

AUC_OCT_2019_gini_grid_1 = IAI.score(lnr_2019_gini_best_1,test_2019_X, test_2019_y,criterion=:auc)

0.4888559700828899

## CV 2

In [26]:
#### CROSS VALIDATION
lnr_2019_gini = IAI.OptimalTreeClassifier(random_seed=15095, 
    criterion=:gini,
    treat_unknown_level_missing=true, 
    missingdatamode=:separate_class)

grid_2019_gini_2 = IAI.GridSearch(lnr_2019_gini,
    max_depth=[6,7,8],
    minbucket = [15,20,25]
)
IAI.fit!(grid_2019_gini_2, train_2019_X, train_2019_y)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  minbucket=>15 max_depth=>6[39m

[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:10:32[39m

[A2mTraining trees...                    2%|▍               |  ETA: 0:08:41[39m

[A2mTraining trees...                    3%|▌               |  ETA: 0:09:11[39m

[A2mTraining trees...                    4%|▋               |  ETA: 0:08:07[39m

[A2mTraining trees...                    5%|▊               |  ETA: 0:07:24[39m

[A2mTraining trees...                    6%|█               |  ETA: 0:07:00[39m

[A2mTraining trees...                    7%|█▏              |  ETA: 0:06:59[39m

[A2mTraining trees...                    8%|█▎              |  ETA: 0:07:03[39m

[A2mTraining trees...                    9%|█▌              |  ETA: 0:06:45[39m

[A2mTraining trees...                   10

[A2mTraining trees...                   93%|██████████████▉ |  ETA: 0:00:21[39m

[A2mTraining trees...                   94%|███████████████ |  ETA: 0:00:18[39m

[A2mTraining trees...                   95%|███████████████▎|  ETA: 0:00:15[39m

[A2mTraining trees...                   96%|███████████████▍|  ETA: 0:00:12[39m

[A2mTraining trees...                   97%|███████████████▌|  ETA: 0:00:09[39m

[A2mTraining trees...                   98%|███████████████▋|  ETA: 0:00:06[39m

[A2mTraining trees...                   99%|███████████████▉|  ETA: 0:00:03[39m

[A2mTraining trees...                  100%|████████████████| Time: 0:05:02[39m
[32mParameter combination...            22%|███▌            |  ETA: 0:37:50[39m
[A4m  Parameters:  minbucket=>25 max_depth=>6[39m

[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:04:52[39m

[A2mTraining trees...                  

[A2mTraining trees...                   85%|█████████████▋  |  ETA: 0:01:01[39m

[A2mTraining trees...                   86%|█████████████▊  |  ETA: 0:00:57[39m

[A2mTraining trees...                   87%|█████████████▉  |  ETA: 0:00:53[39m

[A2mTraining trees...                   88%|██████████████▏ |  ETA: 0:00:48[39m

[A2mTraining trees...                   89%|██████████████▎ |  ETA: 0:00:44[39m

[A2mTraining trees...                   90%|██████████████▍ |  ETA: 0:00:40[39m

[A2mTraining trees...                   91%|██████████████▌ |  ETA: 0:00:36[39m

[A2mTraining trees...                   92%|██████████████▊ |  ETA: 0:00:32[39m

[A2mTraining trees...                   93%|██████████████▉ |  ETA: 0:00:28[39m

[A2mTraining trees...                   94%|███████████████ |  ETA: 0:00:24[39m

[A2mTraining trees...                   95%|███████████████▎|  ETA: 0:00:20[39m

[A2mTraining trees...                   96%|███████████████▍|  ETA: 0:00:16[39m

[A2

[A2mTraining trees...                   77%|████████████▍   |  ETA: 0:01:17[39m

[A2mTraining trees...                   78%|████████████▌   |  ETA: 0:01:14[39m

[A2mTraining trees...                   79%|████████████▋   |  ETA: 0:01:11[39m

[A2mTraining trees...                   80%|████████████▊   |  ETA: 0:01:07[39m

[A2mTraining trees...                   81%|█████████████   |  ETA: 0:01:04[39m

[A2mTraining trees...                   82%|█████████████▏  |  ETA: 0:01:01[39m

[A2mTraining trees...                   83%|█████████████▎  |  ETA: 0:00:58[39m

[A2mTraining trees...                   84%|█████████████▌  |  ETA: 0:00:54[39m

[A2mTraining trees...                   85%|█████████████▋  |  ETA: 0:00:51[39m

[A2mTraining trees...                   86%|█████████████▊  |  ETA: 0:00:47[39m

[A2mTraining trees...                   87%|█████████████▉  |  ETA: 0:00:44[39m

[A2mTraining trees...                   88%|██████████████▏ |  ETA: 0:00:41[39m

[A2

[A2mTraining trees...                   69%|███████████     |  ETA: 0:02:04[39m

[A2mTraining trees...                   70%|███████████▎    |  ETA: 0:01:59[39m

[A2mTraining trees...                   71%|███████████▍    |  ETA: 0:01:55[39m

[A2mTraining trees...                   72%|███████████▌    |  ETA: 0:01:51[39m

[A2mTraining trees...                   73%|███████████▋    |  ETA: 0:01:47[39m

[A2mTraining trees...                   74%|███████████▉    |  ETA: 0:01:43[39m

[A2mTraining trees...                   75%|████████████    |  ETA: 0:01:39[39m

[A2mTraining trees...                   76%|████████████▏   |  ETA: 0:01:35[39m

[A2mTraining trees...                   77%|████████████▍   |  ETA: 0:01:31[39m

[A2mTraining trees...                   78%|████████████▌   |  ETA: 0:01:27[39m

[A2mTraining trees...                   79%|████████████▋   |  ETA: 0:01:23[39m

[A2mTraining trees...                   80%|████████████▊   |  ETA: 0:01:19[39m

[A2








[A2mTraining trees...                    0%|                |  ETA: N/A[39m

[A2mTraining trees...                    1%|▏               |  ETA: 0:09:01[39m

[A2mTraining trees...                    2%|▍               |  ETA: 0:10:28[39m

[A2mTraining trees...                    3%|▌               |  ETA: 0:09:50[39m

[A2mTraining trees...                    4%|▋               |  ETA: 0:09:33[39m

[A2mTraining trees...                    5%|▊               |  ETA: 0:09:11[39m

[A2mTraining trees...                    6%|█               |  ETA: 0:09:23[39m

[A2mTraining trees...                    7%|█▏              |  ETA: 0:09:17[39m

[A2mTraining trees...                    8%|█▎              |  ETA: 0:09:24[39m

[A2mTraining trees...                    9%|█▌              |  ETA: 0:09:12[39m

[A2mTraining trees...                   10%|█▋              |  ETA: 0:08:55[39m

[A2mTraining trees...                   11%|█▊              |  ETA: 0:09:12[39m

[A2mT






All Grid Results:

│ Row │ minbucket │ max_depth │ cp         │ train_score │ valid_score │
│     │ [90mInt64[39m     │ [90mInt64[39m     │ [90mFloat64[39m    │ [90mFloat64[39m     │ [90mFloat64[39m     │
├─────┼───────────┼───────────┼────────────┼─────────────┼─────────────┤
│ 1   │ 15        │ 6         │ 0.00255243 │ 0.490726    │ 0.0511394   │
│ 2   │ 15        │ 7         │ 0.00684737 │ 0.592601    │ 0.0637895   │
│ 3   │ 15        │ 8         │ 0.00282704 │ 0.606603    │ 0.0595231   │
│ 4   │ 20        │ 6         │ 0.00112146 │ 0.488444    │ 0.0550825   │
│ 5   │ 20        │ 7         │ 0.00517693 │ 0.522391    │ 0.064183    │
│ 6   │ 20        │ 8         │ 0.00609566 │ 0.551697    │ 0.052002    │
│ 7   │ 25        │ 6         │ 0.00559687 │ 0.431264    │ 0.0422422   │
│ 8   │ 25        │ 7         │ 0.00135158 │ 0.494741    │ 0.0615133   │
│ 9   │ 25        │ 8         │ 2.65591e-5 │ 0.486881    │ 0.0478368   │

│ Row │ rank_valid_score │
│     │ [90mInt64[39m    

In [27]:
lnr_2019_gini_best_2 = IAI.get_learner(grid_2019_gini_2)

AUC_OCT_2019_gini_grid_2 = IAI.score(lnr_2019_gini_best_2, test_2019_X, test_2019_y,criterion=:auc)

0.5045148298440959

In [28]:
 IAI.score(lnr_2019_gini_best_2,test_2019_X, test_2019_y,criterion=:accuracy, positive_label=1)

0.5045970795024337