In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pprint import pprint

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from auxiliar_func import *
from plot_func import *

In [2]:
df = pd.read_csv('Census-Income-KDD.csv')
target = 'income_50k'
df_tr, df_te = train_test_split(df, test_size=0.3, random_state=42)

In [7]:
def test_preprocess_params(
    model: object,
    params: dict,
    df: pd.DataFrame,
    metrics: list = ['accuracy', 'f1_macro',
                     'precision_macro', 'recall_macro'],
    cv: int = 4,
    verbose: int = 1,
    col_prefix: str = 'prep_'
) -> pd.DataFrame:
    c_names = ['prep_param'] + metrics
    results = pd.DataFrame(columns=c_names, dtype=object)

    for combination in list(itertools.product(*params.values())):
        if verbose > 0: print(f"Adjusting for {combination}")
        try:
            par_tr = {k: v for k, v in zip(params.keys(), combination)}
            par_tr['remove_duplicates'] = True

            cross_val_results = cross_validation(model, df, par_tr, cv=cv,
                                                scoring=metrics)
            results = pd.concat([results, pd.DataFrame({'prep_param': [par_tr]} | cross_val_results)])
        except Exception as e:
            if verbose > 0: print(f"Error in {combination}")
            if verbose > 1: print(e) 

    return results

prep_params = {
    'scaling': [None],
    'imputation': ['mode'],
    'cat_age': [False],
    'target_freq': [0.7, 0.8],
    'generate_dummies': [True]
}

lda = LDA(n_components=1)

# params = {
#     'solver': ['svd', 'lsqr'],
#     'tol': [1e-4, 1e-3],
# }

results = test_preprocess_params(lda, prep_params, df_tr, verbose=2, cv=2)
results.head()

Adjusting for (None, 'mode', False, 0.7, True)
Adjusting for (None, 'mode', False, 0.8, True)


Unnamed: 0,prep_param,accuracy,f1_macro,precision_macro,recall_macro
0,"{'scaling': None, 'imputation': 'mode', 'cat_a...",0.916415,0.73673,0.693563,0.823139
0,"{'scaling': None, 'imputation': 'mode', 'cat_a...",0.933885,0.753257,0.727472,0.788094
