# Optimal Feature (Exact Method)

# Train 1st half, Test 2nd half (2019, 1 year imputation)

## Reading in Data

In [3]:
using CSV, DataFrames, Statistics, Random, Gurobi, JuMP

In [13]:
train_2019 = DataFrame(CSV.File("../../data/Imputed_Final/train_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
test_2019 = DataFrame(CSV.File("../../data/Imputed_Final/test_0.5_2019_imputed_full.csv", header=true,
        missingstring="NA", categorical=true))
# Making variables categorical variables, train set
categorical!(train_2019, :result)
categorical!(train_2019, :playoffs)
# Same categorical for testing set
categorical!(test_2019, :result)
categorical!(test_2019, :playoffs)
;

In [14]:
train_2019_X = train_2019[:,Not(:result)]
train_2019_y = train_2019[:,:result]
test_2019_X = test_2019[:,Not(:result)]
test_2019_y = test_2019[:,:result]
;

## Grid 1

In [9]:
lnr_2019_grid = IAI.OptimalFeatureSelectionClassifier(
        random_seed=15095,
        relaxation = false,
        criterion = :entropy,
        solver=optimizer_with_attributes(
            Gurobi.Optimizer, 
            "TimeLimit" => 60
        )
    )

grid_2019 = IAI.GridSearch(lnr_2019_grid,
    sparsity=8:12,
)
IAI.fit!(grid_2019, train_2019_X, train_2019_y, validation_criterion=:auc)

[32mParameter combination...             0%|                |  ETA: N/A[39m
[A4m  Parameters:  sparsity=>5[39m
[32mParameter combination...             5%|▊               |  ETA: 0:00:44[39m
[A4m  Parameters:  sparsity=>6[39m
[32mParameter combination...            10%|█▌              |  ETA: 0:00:40[39m
[A4m  Parameters:  sparsity=>7[39m
[32mParameter combination...            14%|██▎             |  ETA: 0:00:36[39m
[A4m  Parameters:  sparsity=>8[39m
[32mParameter combination...            19%|███             |  ETA: 0:00:32[39m
[A4m  Parameters:  sparsity=>9[39m
[32mParameter combination...            24%|███▊            |  ETA: 0:00:31[39m
[A4m  Parameters:  sparsity=>10[39m
[32mParameter combination...            29%|████▋           |  ETA: 0:00:28[39m
[A4m  Parameters:  sparsity=>11[39m
[32mParameter combination...            33%|█████▍          |  ETA: 0:00:25[39m
[A4m  Parameters:  sparsity=>12[39m
[32mParameter combination...            38%|████






[K[A[32mRefitting with best parameters...  100%|████████████████| Time: 0:00:02[39m
[34m  Parameters:  sparsity=>10[39m[A





All Grid Results:

│ Row │ sparsity │ train_score │ valid_score │ rank_valid_score │
│     │ [90mInt64[39m    │ [90mFloat64[39m     │ [90mFloat64[39m     │ [90mInt64[39m            │
├─────┼──────────┼─────────────┼─────────────┼──────────────────┤
│ 1   │ 5        │ 0.0355006   │ 0.590634    │ 17               │
│ 2   │ 6        │ 0.0376854   │ 0.575817    │ 21               │
│ 3   │ 7        │ 0.0424893   │ 0.587805    │ 19               │
│ 4   │ 8        │ 0.0412618   │ 0.579957    │ 20               │
│ 5   │ 9        │ 0.0468596   │ 0.613118    │ 6                │
│ 6   │ 10       │ 0.0529921   │ 0.624312    │ 1                │
│ 7   │ 11       │ 0.0533606   │ 0.621439    │ 2                │
│ 8   │ 12       │ 0.054227    │ 0.61692     │ 3                │
│ 9   │ 13       │ 0.0522509   │ 0.615851    │ 4                │
│ 10  │ 14       │ 0.0530451   │ 0.613304    │ 5                │
│ 11  │ 15       │ 0.0532717   │ 0.61075     │ 7                │
│ 12  │ 16       

In [12]:
lnr_2019_best_1 = IAI.get_learner(grid_2019)

IAI.score(grid_2019, test_2019_X, test_2019_y, criterion=:auc)

0.5826014359463996

In [13]:
IAI.score(grid_2019, test_2019_X, test_2019_y, criterion=:accuracy, positive_label = 1)

0.5700378583017848