In [None]:
# test classification dataset

from sklearn.datasets import make_classification

# define dataset

X, y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)

# summarize the dataset

print(X.shape, y.shape)

(1000, 10) (1000,)


In [None]:
# evaluate a lda model on the dataset

from numpy import mean

from numpy import std

from sklearn.datasets import make_classification

from sklearn.model_selection import cross_val_score

from sklearn.model_selection import RepeatedStratifiedKFold

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# define dataset

In [None]:
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=10, n_informative=10, n_redundant=0, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # 20% Test split

# define model

model = LinearDiscriminantAnalysis()

# define model evaluation method

cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

# evaluate model

scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)

# summarize result

print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean Accuracy: 0.893 (0.033)


In [None]:
from sklearn.model_selection import GridSearchCV
lda_param_grid = {"solver" : ["svd"],"tol" : [0.0001,0.0002,0.0003]}
grid_search = GridSearchCV(estimator=model, param_grid=lda_param_grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
grid_result = grid_search.fit(X_train, y_train)

In [None]:
print("Best: %f using %s\n" % (grid_result.best_score_, grid_result.best_params_)) # Showing the best result and saving the other results and printing them
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

Best: 0.899167 using {'solver': 'svd', 'tol': 0.0001}



In [None]:
final_model = grid_result.best_estimator_
predictions = final_model.predict(X_test)

In [None]:
from sklearn import metrics
from sklearn.metrics import classification_report,roc_curve,confusion_matrix

In [None]:
import pandas as pd
c = pd.DataFrame({"Actual": y_test, "Predictions": predictions})
print(c)

     Actual  Predictions
0         0            0
1         1            1
2         1            1
3         1            1
4         1            1
..      ...          ...
195       0            1
196       0            0
197       1            1
198       0            0
199       0            0

[200 rows x 2 columns]


In [None]:

print('Classification Report\n',classification_report(y_test, predictions))
print('Confusion Matrix\n',confusion_matrix(y_test, predictions))
print("Accuracy:",metrics.accuracy_score(y_test, predictions))
print("Precision Score:",metrics.precision_score(y_test, predictions, 
                                           pos_label='positive',
                                           average='micro'))
print("Recall Score:",metrics.recall_score(y_test, predictions, 
                                           pos_label='positive',
                                           average='micro'))

Classification Report
               precision    recall  f1-score   support

           0       0.95      0.80      0.87        93
           1       0.84      0.96      0.90       107

    accuracy                           0.89       200
   macro avg       0.90      0.88      0.88       200
weighted avg       0.89      0.89      0.88       200

Confusion Matrix
 [[ 74  19]
 [  4 103]]
Accuracy: 0.885
Precision Score: 0.885
Recall Score: 0.885


