In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd '/content/drive/MyDrive/Ali Sobhani Thesis/DWT'

/content/drive/.shortcut-targets-by-id/10ul3ZK_MC4nZuEgYotLTtUO0eEZB3gue/Ali Sobhani Thesis/DWT


In [3]:
import numpy as np
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV, StratifiedKFold
from sklearn.feature_selection import SelectKBest, chi2, f_classif, mutual_info_classif
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import fbeta_score


feature_selection = SelectKBest(score_func=mutual_info_classif)
feature_names = ['IP1_E1', 'IP1_E2', 'IN1_E1','IN1_E2','IP2_E1', 'IP2_E2', 'IN2_E1', 'IN2_E2',
                    'VP1_E1', 'VP1_E2', 'VN1_E1', 'VN1_E2', 'VP2_E1', 'VP2_E2', 'VN2_E1', 'VN2_E2']

X = np.load('X.npy')
Y = np.load('Y.npy')

lda = LinearDiscriminantAnalysis()

pipeline = Pipeline([
    ('feature_selection', feature_selection),
    ('lda', lda)
])

param_grid_1 = {
    'feature_selection__k': list(range(1, X.shape[1] + 1)),
    'lda__solver': ['svd']
}

param_grid_2 = {
    'feature_selection__k': list(range(1, X.shape[1] + 1)),
    'lda__solver': ['lsqr', 'eigen'],
    'lda__shrinkage': ['auto', None] + list(np.arange(0.1, 1, 0.1))
}

param_grid = [param_grid_1, param_grid_2]

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=13)
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=13)

grid_search = HalvingGridSearchCV(pipeline, param_grid, cv=inner_cv, scoring='accuracy')

best_scores = {'accuracy': [], 'f2': [], 'f1':[]}
best_params = []
all_selected_names = []

for train_idx, test_idx in outer_cv.split(X, Y):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = Y[train_idx], Y[test_idx]

    grid_search.fit(X_train, y_train)
    best_params.append(grid_search.best_params_)

    #best_scores['accuracy'].append(grid_search.best_score_)

    y_pred = grid_search.predict(X_test)

    f2 = fbeta_score(y_test, y_pred, beta=2)
    ################################
    from sklearn.metrics import f1_score, accuracy_score
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    ################################
    best_scores['f2'].append(f2)
    best_scores['f1'].append(f1)
    best_scores['accuracy'].append(accuracy)

    selected_features = grid_search.best_estimator_.named_steps['feature_selection']
    selected_names = [feature_names[i] for i in selected_features.get_support(indices=True)]
    all_selected_names.append(selected_names)

average_best_scores = {scoring: np.mean(scores) for scoring, scores in best_scores.items()}
print(f"Average best scores: {average_best_scores}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
27 fits failed out of a total of 736.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
9 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 405, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/discriminant_analysis.py", line 631, in fit
    self._solve_eigen(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/discriminant_analysis.py", 

Average best scores: {'accuracy': 0.970408238594419, 'f2': 0.9302281459028118, 'f1': 0.9520934941166063}


 0.875      0.875      0.625      0.75       0.75       0.75
 0.75       0.625      0.625      0.625      0.875      0.875
 0.875      0.5        0.875      0.875      0.875      0.875
 0.875      0.875      0.875      0.875      0.5        0.5
 0.875      0.5        0.5        0.875      0.875      0.875
 0.875      0.875      0.875      0.875      1.         1.
 0.875      0.875      0.875      0.875      0.875      0.875
 0.875      0.875      0.875      0.875      0.875      0.875
 0.875      0.875      0.875      0.875      0.875      0.875
 0.625             nan 0.875             nan 0.625      0.875
 0.875      0.75       0.5        0.5        0.75       0.75
 0.75       0.75       0.875      0.75       0.875      0.875
 0.875      0.875      0.875      0.875      0.375      0.75
 0.625             nan 0.75       0.75       0.625      0.75
 0.75       0.75       0.75       0.75       0.875      0.75
 0.75       0.625      0.875      0.75       0.75       0.75
 0.75       1.     

In [4]:
import pandas as pd
import openpyxl
model = ['LR', 'LDA', 'SVM', 'KNN', 'XGBoost', 'RF']
FS = ['ANOVA', 'MI', 'Pearson', 'Chi2']
# Load the existing file
book = openpyxl.load_workbook('DWT_Results.xlsx')

# Prepare the data to be written
data_acc = average_best_scores['accuracy']
data_f2 = average_best_scores['f2']
data_f1 = average_best_scores['f1']

# Get the existing sheets
sheet_acc = book['ACC']
sheet_f2 = book['F2']
sheet_f1 = book['F1']

# Calculate the correct row and column numbers
row = model.index('LDA') + 2  # +2 because Excel index starts from 1 and row 1 contains headers
col = FS.index('MI') + 2  # +2 because Excel index starts from 1 and column 1 contains headers

# Write to the ACC sheet
sheet_acc.cell(row=row, column=col, value=data_acc)

# Write to the F2 sheet
sheet_f2.cell(row=row, column=col, value=data_f2)

# Write to the F1 sheet
sheet_f1.cell(row=row, column=col, value=data_f1)

# Save and close the Excel file
book.save('DWT_Results.xlsx')

In [5]:
import os
import pickle

# Specify the directory path
dir_path = '/content/drive/MyDrive/Ali Sobhani Thesis/DWT/MI/LDA'

# Save best_params and all_selected_names to the directory
with open(os.path.join(dir_path, 'best_params.pkl'), 'wb') as f:
    pickle.dump(best_params, f)
with open(os.path.join(dir_path, 'all_selected_names.pkl'), 'wb') as f:
    pickle.dump(all_selected_names, f)