In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score
import pandas as pd

In [2]:
dataset = pd.read_csv('processedData.csv')
target = dataset['target']
data = dataset.drop(columns='target')

In [3]:
gridParameters = [{
    'solver': ['svd', 'lsqr', 'eigen'],
    'tol': [1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1]
}]

In [4]:
scoring = {
    'Accuracy': make_scorer(accuracy_score), 
    'Precision': make_scorer(precision_score, average='macro'), 
    'Recall': make_scorer(recall_score, average='macro')
    }
gs = GridSearchCV(
    LinearDiscriminantAnalysis(),
    gridParameters,
    verbose=10,
    cv=5,
    n_jobs=-1,
    scoring = scoring,
    refit = 'Recall'
)
gs.fit(X=data, y=target)

Fitting 5 folds for each of 21 candidates, totalling 105 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    2.5s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    2.6s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1877s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0948s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done  87 out of 105 | elapsed:    3.2s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done 105 out of 105 | elapsed:    3.3s finished


GridSearchCV(cv=5, estimator=LinearDiscriminantAnalysis(), n_jobs=-1,
             param_grid=[{'solver': ['svd', 'lsqr', 'eigen'],
                          'tol': [1e-07, 1e-06, 1e-05, 0.0001, 0.001, 0.01,
                                  0.1]}],
             refit='Recall',
             scoring={'Accuracy': make_scorer(accuracy_score),
                      'Precision': make_scorer(precision_score, average=macro),
                      'Recall': make_scorer(recall_score, average=macro)},
             verbose=10)

In [5]:
bestEstimators = gs.best_estimator_
finalAcuraccy = gs.cv_results_["mean_test_Accuracy"][gs.best_index_]
finalPrecision = gs.cv_results_["mean_test_Precision"][gs.best_index_]
finalRecall = gs.cv_results_["mean_test_Recall"][gs.best_index_]
stdDeviAcuraccy = gs.cv_results_["std_test_Accuracy"][gs.best_index_] 
stdDeviPrecision = gs.cv_results_["std_test_Precision"][gs.best_index_] 
stdDeviRecall = gs.cv_results_["std_test_Recall"][gs.best_index_] 

In [6]:
print(f'Accuracy: {round(finalAcuraccy, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviAcuraccy, 2)}')
print(f'Recall: {round(finalRecall, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviRecall, 2)}')
print(f'Accuracy: {round(finalPrecision, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviPrecision, 2)}')

Accuracy: 0.873
std Deviation Accuracy: 0.02
Recall: 0.651
std Deviation Accuracy: 0.09
Accuracy: 0.738
std Deviation Accuracy: 0.18


In [7]:
arquivoResultados = "LDAResults.txt"
objFile = open(arquivoResultados, "a")
objFile.write("LDA Results \n")
objFile.write(str(bestEstimators).replace("\n", "").replace(" ", "") + "\n")
objFile.write(str(finalAcuraccy)+"\n")
objFile.write(str(stdDeviAcuraccy)+"\n")
objFile.write(str(finalRecall)+"\n")
objFile.write(str(stdDeviRecall)+"\n")
objFile.write(str(finalPrecision)+"\n")
objFile.write(str(stdDeviPrecision)+"\n")
objFile.close()