In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pprint import pprint

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from auxiliar_func import *
from plot_func import *

In [14]:
df = pd.read_csv('Census-Income-KDD.csv')
target = 'income_50k'
df_tr, df_te = train_test_split(df, test_size=0.3, random_state=42)

In [15]:
prep_params_grid = {
    'scaling': [None, 'minmax', 'std'],
    'imputation': ['mode', 'nancat'],
    'cat_age': [False, True],
    'target_freq': [0.8, 0.9],
    'generate_dummies': [True]
}

mod_par_grid = {
    'solver': ['svd', 'lsqr', 'eigen'],
    'shrinkage': [None, 'auto'],
    'store_covariance': [True, False],
    'tol': [1e-4, 1e-3, 1e-2],
}

lda = LDA(n_components=1)

results = search_best_combination(lda, mod_par_grid, prep_params_grid, df_tr, target_metric='f1_macro', cv=5, verbose=1)
results.to_csv('results_lda.csv', index=False)

===Iteration 1===
Searching preprocessing parameters...


In [None]:
results.columns
# keep only 'prep_param', 'mod_param', 'f1_macro'
pprint(results[['prep_param', 'model_param', 'f1_macro']].head().to_dict('records'))

[{'f1_macro': 0.7543843078854054,
  'model_param': {'shrinkage': None,
                  'solver': 'svd',
                  'store_covariance': True,
                  'tol': 0.0001},
  'prep_param': {'cat_age': True,
                 'generate_dummies': True,
                 'imputation': 'mode',
                 'remove_duplicates': True,
                 'scaling': None,
                 'target_freq': 0.8}},
 {'f1_macro': 0.7543843078854054,
  'model_param': {'shrinkage': None,
                  'solver': 'svd',
                  'store_covariance': False,
                  'tol': 0.0001},
  'prep_param': {'cat_age': True,
                 'generate_dummies': True,
                 'imputation': 'mode',
                 'remove_duplicates': True,
                 'scaling': None,
                 'target_freq': 0.8}},
 {'f1_macro': 0.7543843078854054,
  'model_param': {'shrinkage': None,
                  'solver': 'svd',
                  'store_covariance': False,
              