In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd '/content/drive/MyDrive/Ali Sobhani Thesis/Noisy/FFT'

/content/drive/.shortcut-targets-by-id/10ul3ZK_MC4nZuEgYotLTtUO0eEZB3gue/Ali Sobhani Thesis/Noisy/FFT


In [3]:
import numpy as np
from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV, StratifiedKFold
from sklearn.feature_selection import SelectKBest, chi2, f_classif, mutual_info_classif
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import fbeta_score
from scipy.stats import pearsonr

feature_selection = SelectKBest(score_func=mutual_info_classif)
feature_names = ['IP1_C1', 'IP1_C2', 'IP1_C3','IP1_C4','IN1_C1', 'IN1_C2', 'IN1_C3', 'IN1_C4',
                    'IP2_C1', 'IP2_C2', 'IP2_C3', 'IP2_C4', 'IN2_C1', 'IN2_C2', 'IN2_C3', 'IN2_C4',
                    'VP1_C1', 'VP1_C2', 'VP1_C3', 'VP1_C4', 'VN1_C1', 'VN1_C2', 'VN1_C3', 'VN1_C4',
                    'VP2_C1', 'VP2_C2', 'VP2_C3', 'VP2_C4', 'VN2_C1', 'VN2_C2', 'VN2_C3', 'VN2_C4']

############################
X = np.load('X.npy')
Y = np.load('Y.npy')
X_20 = np.load('X_FFT_SNR_20.npy')
Y_20 = np.load('Y_FFT_SNR_20.npy')
X_25 = np.load('X_FFT_SNR_25.npy')
Y_25 = np.load('Y_FFT_SNR_25.npy')
X_30 = np.load('X_FFT_SNR_30.npy')
Y_30 = np.load('Y_FFT_SNR_30.npy')
X_35 = np.load('X_FFT_SNR_35.npy')
Y_35 = np.load('Y_FFT_SNR_35.npy')
X_40 = np.load('X_FFT_SNR_40.npy')
Y_40 = np.load('Y_FFT_SNR_40.npy')
X_45 = np.load('X_FFT_SNR_45.npy')
Y_45 = np.load('Y_FFT_SNR_45.npy')
###########################


knn = KNeighborsClassifier()

pipeline = Pipeline([
    ('feature_selection', feature_selection),
    ('knn', knn)
])

param_grid = {
    'feature_selection__k': list(range(1, X.shape[1] + 1)),
    'knn__n_neighbors': list(range(1, 21, 2)),
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan', 'minkowski']
}

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=13)
inner_cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=13)

grid_search = HalvingGridSearchCV(pipeline, param_grid, cv=inner_cv, scoring='accuracy')

from sklearn.metrics import f1_score, accuracy_score
best_scores = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_20 = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_25 = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_30 = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_35 = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_40 = {'accuracy': [], 'f2': [], 'f1':[]}
best_scores_45 = {'accuracy': [], 'f2': [], 'f1':[]}

for train_idx, test_idx in outer_cv.split(X, Y):
    X_train, X_test, X_test_20, X_test_25, X_test_30, X_test_35, X_test_40, X_test_45 = X[train_idx], X[test_idx], X_20[test_idx], X_25[test_idx], X_30[test_idx], X_35[test_idx], X_40[test_idx], X_45[test_idx]
    y_train, y_test, y_test_20, y_test_25, y_test_30, y_test_35, y_test_40, y_test_45 = Y[train_idx], Y[test_idx], Y_20[test_idx], Y_25[test_idx], Y_30[test_idx], Y_35[test_idx], Y_40[test_idx], Y_45[test_idx]

    grid_search.fit(X_train, y_train)

    y_pred = grid_search.predict(X_test)
    y_pred_20 = grid_search.predict(X_test_20)
    y_pred_25 = grid_search.predict(X_test_25)
    y_pred_30 = grid_search.predict(X_test_30)
    y_pred_35 = grid_search.predict(X_test_35)
    y_pred_40 = grid_search.predict(X_test_40)
    y_pred_45 = grid_search.predict(X_test_45)

    f2 = fbeta_score(y_test, y_pred, beta=2)
    f2_20 = fbeta_score(y_test_20, y_pred_20, beta=2)
    f2_25 = fbeta_score(y_test_25, y_pred_25, beta=2)
    f2_30 = fbeta_score(y_test_30, y_pred_30, beta=2)
    f2_35 = fbeta_score(y_test_35, y_pred_35, beta=2)
    f2_40 = fbeta_score(y_test_40, y_pred_40, beta=2)
    f2_45 = fbeta_score(y_test_45, y_pred_45, beta=2)

    accuracy = accuracy_score(y_test, y_pred)
    accuracy_20 = accuracy_score(y_test_20, y_pred_20)
    accuracy_25 = accuracy_score(y_test_25, y_pred_25)
    accuracy_30 = accuracy_score(y_test_30, y_pred_30)
    accuracy_35 = accuracy_score(y_test_35, y_pred_35)
    accuracy_40 = accuracy_score(y_test_40, y_pred_40)
    accuracy_45 = accuracy_score(y_test_45, y_pred_45)

    f1 = f1_score(y_test, y_pred)
    f1_20 = f1_score(y_test_20, y_pred_20)
    f1_25 = f1_score(y_test_25, y_pred_25)
    f1_30 = f1_score(y_test_30, y_pred_30)
    f1_35 = f1_score(y_test_35, y_pred_35)
    f1_40 = f1_score(y_test_40, y_pred_40)
    f1_45 = f1_score(y_test_45, y_pred_45)

    best_scores['f2'].append(f2)
    best_scores_20['f2'].append(f2_20)
    best_scores_25['f2'].append(f2_25)
    best_scores_30['f2'].append(f2_30)
    best_scores_35['f2'].append(f2_35)
    best_scores_40['f2'].append(f2_40)
    best_scores_45['f2'].append(f2_45)

    best_scores['f1'].append(f1)
    best_scores_20['f1'].append(f1_20)
    best_scores_25['f1'].append(f1_25)
    best_scores_30['f1'].append(f1_30)
    best_scores_35['f1'].append(f1_35)
    best_scores_40['f1'].append(f1_40)
    best_scores_45['f1'].append(f1_45)

    best_scores['accuracy'].append(accuracy)
    best_scores_20['accuracy'].append(accuracy_20)
    best_scores_25['accuracy'].append(accuracy_25)
    best_scores_30['accuracy'].append(accuracy_30)
    best_scores_35['accuracy'].append(accuracy_35)
    best_scores_40['accuracy'].append(accuracy_40)
    best_scores_45['accuracy'].append(accuracy_45)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 481, in predict
    return self.steps[-1][1].predict(Xt, **predict_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/neighbors/_classification.py", line 234, in predict
    neigh_ind = self.kneighbors(X, return_distance=False

In [4]:
average_best_scores = {scoring: np.mean(scores) for scoring, scores in best_scores.items()}
average_best_scores_20 = {scoring: np.mean(scores) for scoring, scores in best_scores_20.items()}
average_best_scores_25 = {scoring: np.mean(scores) for scoring, scores in best_scores_25.items()}
average_best_scores_30 = {scoring: np.mean(scores) for scoring, scores in best_scores_30.items()}
average_best_scores_35 = {scoring: np.mean(scores) for scoring, scores in best_scores_35.items()}
average_best_scores_40 = {scoring: np.mean(scores) for scoring, scores in best_scores_40.items()}
average_best_scores_45 = {scoring: np.mean(scores) for scoring, scores in best_scores_45.items()}

print(f"Average best scores: {average_best_scores}")
print(f"Average best scores_20: {average_best_scores_20}")
print(f"Average best scores_25: {average_best_scores_25}")
print(f"Average best scores_30: {average_best_scores_30}")
print(f"Average best scores_35: {average_best_scores_35}")
print(f"Average best scores_40: {average_best_scores_40}")
print(f"Average best scores_45: {average_best_scores_45}")

Average best scores: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}
Average best scores_20: {'accuracy': 0.9984622766868447, 'f2': 0.999043062200957, 'f1': 0.9976119402985073}
Average best scores_25: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}
Average best scores_30: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}
Average best scores_35: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}
Average best scores_40: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}
Average best scores_45: {'accuracy': 1.0, 'f2': 1.0, 'f1': 1.0}


In [5]:
import pandas as pd
import openpyxl
model = ['LR', 'LDA', 'SVM', 'KNN', 'XGBoost', 'RF']
FS = ['ANOVA', 'MI', 'Pearson', 'Chi2']
# Load the existing file
book = openpyxl.load_workbook('FFT_Results.xlsx')
book_20 = openpyxl.load_workbook('FFT_Results_20.xlsx')
book_25 = openpyxl.load_workbook('FFT_Results_25.xlsx')
book_30 = openpyxl.load_workbook('FFT_Results_30.xlsx')
book_35 = openpyxl.load_workbook('FFT_Results_35.xlsx')
book_40 = openpyxl.load_workbook('FFT_Results_40.xlsx')
book_45 = openpyxl.load_workbook('FFT_Results_45.xlsx')

# Prepare the data to be written
data_acc = average_best_scores['accuracy']
data_acc_20 = average_best_scores_20['accuracy']
data_acc_25 = average_best_scores_25['accuracy']
data_acc_30 = average_best_scores_30['accuracy']
data_acc_35 = average_best_scores_35['accuracy']
data_acc_40 = average_best_scores_40['accuracy']
data_acc_45 = average_best_scores_45['accuracy']

data_f2 = average_best_scores['f2']
data_f2_20 = average_best_scores_20['f2']
data_f2_25 = average_best_scores_25['f2']
data_f2_30 = average_best_scores_30['f2']
data_f2_35 = average_best_scores_35['f2']
data_f2_40 = average_best_scores_40['f2']
data_f2_45 = average_best_scores_45['f2']

data_f1 = average_best_scores['f1']
data_f1_20 = average_best_scores_20['f1']
data_f1_25 = average_best_scores_25['f1']
data_f1_30 = average_best_scores_30['f1']
data_f1_35 = average_best_scores_35['f1']
data_f1_40 = average_best_scores_40['f1']
data_f1_45 = average_best_scores_45['f1']
# Get the existing sheets
sheet_acc = book['ACC']
sheet_acc_20 = book_20['ACC']
sheet_acc_25 = book_25['ACC']
sheet_acc_30 = book_30['ACC']
sheet_acc_35 = book_35['ACC']
sheet_acc_40 = book_40['ACC']
sheet_acc_45 = book_45['ACC']

sheet_f2 = book['F2']
sheet_f2_20 = book_20['F2']
sheet_f2_25 = book_25['F2']
sheet_f2_30 = book_30['F2']
sheet_f2_35 = book_35['F2']
sheet_f2_40 = book_40['F2']
sheet_f2_45 = book_45['F2']

sheet_f1 = book['F1']
sheet_f1_20 = book_20['F1']
sheet_f1_25 = book_25['F1']
sheet_f1_30 = book_30['F1']
sheet_f1_35 = book_35['F1']
sheet_f1_40 = book_40['F1']
sheet_f1_45 = book_45['F1']

# Calculate the correct row and column numbers
row = model.index('KNN') + 2  # +2 because Excel index starts from 1 and row 1 contains headers
col = FS.index('MI') + 2  # +2 because Excel index starts from 1 and column 1 contains headers

# Write to the ACC sheet
sheet_acc.cell(row=row, column=col, value=data_acc)
sheet_acc_20.cell(row=row, column=col, value=data_acc_20)
sheet_acc_25.cell(row=row, column=col, value=data_acc_25)
sheet_acc_30.cell(row=row, column=col, value=data_acc_30)
sheet_acc_35.cell(row=row, column=col, value=data_acc_35)
sheet_acc_40.cell(row=row, column=col, value=data_acc_40)
sheet_acc_45.cell(row=row, column=col, value=data_acc_45)

# Write to the F2 sheet
sheet_f2.cell(row=row, column=col, value=data_f2)
sheet_f2_20.cell(row=row, column=col, value=data_f2_20)
sheet_f2_25.cell(row=row, column=col, value=data_f2_25)
sheet_f2_30.cell(row=row, column=col, value=data_f2_30)
sheet_f2_35.cell(row=row, column=col, value=data_f2_35)
sheet_f2_40.cell(row=row, column=col, value=data_f2_40)
sheet_f2_45.cell(row=row, column=col, value=data_f2_45)

# Write to the F1 sheet
sheet_f1.cell(row=row, column=col, value=data_f1)
sheet_f1_20.cell(row=row, column=col, value=data_f1_20)
sheet_f1_25.cell(row=row, column=col, value=data_f1_25)
sheet_f1_30.cell(row=row, column=col, value=data_f1_30)
sheet_f1_35.cell(row=row, column=col, value=data_f1_35)
sheet_f1_40.cell(row=row, column=col, value=data_f1_40)
sheet_f1_45.cell(row=row, column=col, value=data_f1_45)
# Save and close the Excel file
book.save('FFT_Results.xlsx')
book_20.save('FFT_Results_20.xlsx')
book_25.save('FFT_Results_25.xlsx')
book_30.save('FFT_Results_30.xlsx')
book_35.save('FFT_Results_35.xlsx')
book_40.save('FFT_Results_40.xlsx')
book_45.save('FFT_Results_45.xlsx')