# Optimal Feature Analysis

# Train 1st half, Test 2nd half (2019, 1 year imputation)

## Reading in Data

In [1]:
using CSV, DataFrames, Statistics, Random

In [2]:
train_2019 = DataFrame(CSV.File("../../data/Imputed_Final/train_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
test_2019 = DataFrame(CSV.File("../../data/Imputed_Final/test_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
# Making variables categorical variables, train set
categorical!(train_2019, :result)
categorical!(train_2019, :playoffs)
# Same categorical for testing set
categorical!(test_2019, :result)
categorical!(test_2019, :playoffs)
;

In [3]:
train_2019_X = train_2019[:,Not(:result)]
train_2019_y = train_2019[:,:result]
test_2019_X = test_2019[:,Not(:result)]
test_2019_y = test_2019[:,:result]
;

## Initial try

In [6]:
lnr_2019 = IAI.OptimalFeatureSelectionClassifier(random_seed=15095,
    sparsity=10,
    relaxation = true,
    criterion = :entropy)
IAI.fit!(lnr_2019, train_2019_X, train_2019_y) # , validation_criterion=:auc

└ @ IAILicensing /Users/iai/builds/InterpretableAI/SysImgBuilder/.julia/packages/IAILicensing/x1CT6/src/precompile.jl:19
│ d78e32e2e088eca49ebcc2d647b7d343912d22a263e67eb78b4f2c7a6cdec772
└ @ IAILicensing /Users/iai/builds/InterpretableAI/SysImgBuilder/.julia/packages/IAILicensing/x1CT6/src/precompile.jl:29


Fitted OptimalFeatureSelectionClassifier:
  Constant: -2.09258
  Weights:
    avg_death_6mon_sup:           3.49792
    avg_death_6mon_top_enemy:    -3.12307
    avg_dpm_year_bot:             0.00141018
    avg_kda_6mon_sup:             0.105538
    avg_kda_year_sup:             0.123177
    avg_mk_enemy_year_mid:        0.128911
    avg_mk_enemy_year_mid_enemy: -0.195288
    champion_bot==Morgana:        2.49925
    player_wr_6mon_jng:          -0.0492662
    player_wr_year_jng:           0.621634

In [7]:
IAI.score(lnr_2019, test_2019_X, test_2019_y, criterion=:auc)

0.5826014359463996

## Grid Search 1 (Entropy)

In [22]:
lnr_2019_entropy = IAI.OptimalFeatureSelectionClassifier(
        random_seed=15095,
        relaxation = true,
        criterion = :entropy
    )

grid_2019_1 = IAI.GridSearch(lnr_2019_entropy,
    sparsity=5:20,
)
IAI.fit!(grid_2019_1, train_2019_X, train_2019_y, validation_criterion=:auc)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  sparsity=>5[39m
[32mParameter combination...             6%|█               |  ETA: 0:00:19[39m
[A4m  Parameters:  sparsity=>6[39m
[32mParameter combination...            12%|██              |  ETA: 0:00:18[39m
[A4m  Parameters:  sparsity=>7[39m
[32mParameter combination...            19%|███             |  ETA: 0:00:17[39m
[A4m  Parameters:  sparsity=>8[39m
[32mParameter combination...            25%|████            |  ETA: 0:00:15[39m
[A4m  Parameters:  sparsity=>9[39m
[32mParameter combination...            31%|█████           |  ETA: 0:00:15[39m
[A4m  Parameters:  sparsity=>10[39m
[32mParameter combination...            38%|██████          |  ETA: 0:00:14[39m
[A4m  Parameters:  sparsity=>11[39m
[32mParameter combination...            44%|███████         |  ETA: 0:00:13[39m
[A4m  Parameters:  sparsity=>12[39m
[32mParameter combination...            50%|████






[K[A[32mRefitting with best parameters...  100%|████████████████| Time: 0:00:02[39m
[34m  Parameters:  sparsity=>10[39m[A





All Grid Results:

│ Row │ sparsity │ train_score │ valid_score │ rank_valid_score │
│     │ [90mInt64[39m    │ [90mFloat64[39m     │ [90mFloat64[39m     │ [90mInt64[39m            │
├─────┼──────────┼─────────────┼─────────────┼──────────────────┤
│ 1   │ 5        │ 0.0355006   │ 0.590634    │ 12               │
│ 2   │ 6        │ 0.0376854   │ 0.575817    │ 16               │
│ 3   │ 7        │ 0.0424893   │ 0.587805    │ 14               │
│ 4   │ 8        │ 0.0412618   │ 0.579957    │ 15               │
│ 5   │ 9        │ 0.0468596   │ 0.613118    │ 6                │
│ 6   │ 10       │ 0.0529921   │ 0.624312    │ 1                │
│ 7   │ 11       │ 0.0533606   │ 0.621439    │ 2                │
│ 8   │ 12       │ 0.054227    │ 0.61692     │ 3                │
│ 9   │ 13       │ 0.0522509   │ 0.615851    │ 4                │
│ 10  │ 14       │ 0.0530451   │ 0.613304    │ 5                │
│ 11  │ 15       │ 0.0532717   │ 0.61075     │ 7                │
│ 12  │ 16       

In [23]:
lnr_2019_best_1 = IAI.get_learner(grid_2019_1)

IAI.score(lnr_2019_best_1, test_2019_X, test_2019_y, criterion=:auc)

0.5826014359463996

In [24]:
IAI.score(lnr_2019_best_1, test_2019_X, test_2019_y, criterion=:accuracy, positive_label = 1)

0.5700378583017848

### Better Entropy Number

In [10]:
lnr_entr = IAI.OptimalFeatureSelectionClassifier(random_seed=15095,
    sparsity=12,
    relaxation = true,
    criterion = :entropy)
IAI.fit!(lnr_entr, train_2019_X, train_2019_y)

Fitted OptimalFeatureSelectionClassifier:
  Constant: -0.820746
  Weights:
    avg_dpm_year_bot:              0.00173915
    avg_dpm_year_top_enemy:       -0.00194085
    avg_gddiff_10_6mon_top_enemy: -0.000478949
    avg_gddiff_10_year_top_enemy: -0.000227173
    avg_kda_6mon_sup:              0.108274
    avg_kda_year_sup:              0.139831
    avg_kda_year_sup_enemy:       -0.0507449
    avg_mk_enemy_year_mid:         0.145361
    avg_mk_enemy_year_mid_enemy:  -0.168502
    player_wr_year_sup_enemy:     -0.344643
    side==Blue:                    0.138522
    side==Red:                    -0.138522

In [11]:
IAI.score(lnr_entr, test_2019_X, test_2019_y, criterion=:auc)

0.6024182451312156

In [12]:
IAI.score(lnr_entr, test_2019_X, test_2019_y, criterion=:accuracy, positive_label=1)

0.5722011898323418

## Grid 2 (Hinge L1)

In [17]:
lnr_2019_hinge_L1 = IAI.OptimalFeatureSelectionClassifier(
        random_seed=15095,
        relaxation = true,
        criterion = :l1hinge
    )

grid_2019_2 = IAI.GridSearch(lnr_2019_hinge_L1,
    sparsity=5:15
)
IAI.fit!(grid_2019_2, train_2019_X, train_2019_y, validation_criterion=:auc)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  sparsity=>5[39m
[32mParameter combination...             6%|█               |  ETA: 0:02:39[39m
[A4m  Parameters:  sparsity=>6[39m
[32mParameter combination...            12%|██              |  ETA: 0:02:15[39m
[A4m  Parameters:  sparsity=>7[39m
[32mParameter combination...            19%|███             |  ETA: 0:02:10[39m
[A4m  Parameters:  sparsity=>8[39m
[32mParameter combination...            25%|████            |  ETA: 0:02:08[39m
[A4m  Parameters:  sparsity=>9[39m
[32mParameter combination...            31%|█████           |  ETA: 0:02:07[39m
[A4m  Parameters:  sparsity=>10[39m
[32mParameter combination...            38%|██████          |  ETA: 0:02:01[39m
[A4m  Parameters:  sparsity=>11[39m
[32mParameter combination...            44%|███████         |  ETA: 0:01:59[39m
[A4m  Parameters:  sparsity=>12[39m
[32mParameter combination...            50%|████






[K[A[32mRefitting with best parameters...  100%|████████████████| Time: 0:00:22[39m
[34m  Parameters:  sparsity=>10[39m[A





All Grid Results:

│ Row │ sparsity │ train_score │ valid_score │ rank_valid_score │
│     │ [90mInt64[39m    │ [90mFloat64[39m     │ [90mFloat64[39m     │ [90mInt64[39m            │
├─────┼──────────┼─────────────┼─────────────┼──────────────────┤
│ 1   │ 5        │ 0.105487    │ 0.57523     │ 11               │
│ 2   │ 6        │ 0.108297    │ 0.605623    │ 3                │
│ 3   │ 7        │ 0.117287    │ 0.577339    │ 10               │
│ 4   │ 8        │ 0.117336    │ 0.572479    │ 12               │
│ 5   │ 9        │ 0.120446    │ 0.585734    │ 6                │
│ 6   │ 10       │ 0.129084    │ 0.608489    │ 1                │
│ 7   │ 11       │ 0.12124     │ 0.598398    │ 4                │
│ 8   │ 12       │ 0.122669    │ 0.607056    │ 2                │
│ 9   │ 13       │ 0.129651    │ 0.56717     │ 13               │
│ 10  │ 14       │ 0.134682    │ 0.563257    │ 15               │
│ 11  │ 15       │ 0.134595    │ 0.57895     │ 8                │
│ 12  │ 16       

In [18]:
lnr_2019_best_2 = IAI.get_learner(grid_2019_2)

IAI.score(grid_2019_2, test_2019_X, test_2019_y, criterion=:auc)

0.5458070645941862

In [19]:
IAI.score(grid_2019_2, test_2019_X, test_2019_y, criterion=:accuracy, positive_label = 1)

0.5462412114656571

## Grid 3 (Hinge L2)

In [7]:
lnr_2019_hinge_L2 = IAI.OptimalFeatureSelectionClassifier(
        random_seed=15095,
        relaxation = true,
        criterion = :l2hinge
    )

grid_2019_3 = IAI.GridSearch(lnr_2019_hinge_L2,
    sparsity=14:17
)
IAI.fit!(grid_2019_3, train_2019_X, train_2019_y, validation_criterion=:auc)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  sparsity=>14[39m
[32mParameter combination...            25%|████            |  ETA: 0:02:11[39m
[A4m  Parameters:  sparsity=>15[39m
[32mParameter combination...            50%|████████        |  ETA: 0:01:29[39m
[A4m  Parameters:  sparsity=>16[39m
[32mParameter combination...            75%|████████████    |  ETA: 0:00:42[39m
[A4m  Parameters:  sparsity=>17[39m
[32mParameter combination...           100%|████████████████| Time: 0:02:46[39m
[32mRefitting with best parameters...    0%|                |  ETA: N/A[39m
[A4m  Parameters:  sparsity=>14[39m






[K[A[32mRefitting with best parameters...  100%|████████████████| Time: 0:00:54[39m
[34m  Parameters:  sparsity=>14[39m[A





All Grid Results:

│ Row │ sparsity │ train_score │ valid_score │ rank_valid_score │
│     │ [90mInt64[39m    │ [90mFloat64[39m     │ [90mFloat64[39m     │ [90mInt64[39m            │
├─────┼──────────┼─────────────┼─────────────┼──────────────────┤
│ 1   │ 14       │ 0.0725445   │ 0.611351    │ 1                │
│ 2   │ 15       │ 0.0762672   │ 0.610412    │ 3                │
│ 3   │ 16       │ 0.0762746   │ 0.61105     │ 2                │
│ 4   │ 17       │ 0.0765108   │ 0.610286    │ 4                │

Best Params:
  sparsity => 14

Best Model - Fitted OptimalFeatureSelectionClassifier:
  Constant: -0.155127
  Weights:
    avg_dpm_6mon_top_enemy:            0.000252236
    avg_dpm_year_top_enemy:           -0.00104451
    avg_gddiff_10_6mon_top_enemy:     -0.000136349
    avg_gddiff_10_year_top_enemy:     -0.00011516
    avg_kda_6mon_sup:                  0.0427854
    avg_kda_year_sup:                  0.0969796
    avg_kda_year_sup_enemy:           -0.0250985
    avg_mk

In [8]:
lnr_2019_best_3 = IAI.get_learner(grid_2019_3)

IAI.score(grid_2019_3, test_2019_X, test_2019_y, criterion=:auc)

0.5955860327534309

In [9]:
IAI.score(grid_2019_3, test_2019_X, test_2019_y, criterion=:accuracy, positive_label = 1)

0.5700378583017848

# Passing to OCT

In [None]:
vars_importance = IAI.variable_importance(lnr_2019_best_1)
imp_vars_2019 = findall(vars_importance .> 0)

In [None]:
train_2019_X_imp = train_2019_X[:,imp_vars_2019]

In [None]:
#### CROSS VALIDATION
lnr_2019_gini = IAI.OptimalTreeClassifier(random_seed=15095, 
    criterion=:gini,
    treat_unknown_level_missing=true, 
    missingdatamode=:separate_class)

grid_2019_gini_1 = IAI.GridSearch(lnr_2019_gini,
    max_depth=[3,7,10],
    minbucket = [20,30,40]
)
IAI.fit!(grid_2019_gini_1, train_2019_X, train_2019_y)

In [None]:
lnr_2019_gini_best_1 = IAI.get_learner(grid_2019_gini_1)

IAI.score(lnr_2019_gini_best_1, test_2019_X, test_2019_y, criterion=:auc)

In [None]:
IAI.score(lnr_2019_gini_best_1, test_2019_X, test_2019_y, criterion=:accuracy, positive_label=1)