In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from keras.models import Model
from keras.layers import Input, Conv1D, Flatten, Dense, Reshape, Conv1DTranspose
from keras.optimizers import Adam
import os
import re
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning

# Suppress ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

# Define the file names
file_names = [
    "CAE1_12_24_10_256_6_100_256.xlsx",
    "CAE1_16_32_14_256_6_100_128.xlsx",
    "CAE2_16_32_14_256_6_100_256.xlsx",
    "CAE2_32_64_16_256_6_75_256.xlsx",
]

def load_data(file_name):
    # Load data
    df = pd.read_excel(file_name, header=[0, 1])  # Load with multi-level header
    feature_names = df.columns.to_flat_index()  # Get multi-level index
    X = df.values[:, :-1]  # Features (remove the last column which is the class label)
    y = df.values[:, -1]   # Class labels (last column)
    return X, y, feature_names

def preprocess_data(X):
    # Scale the data
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled

def extract_cae_hyperparameters(file_name):
    # Extract CAE hyperparameters from the file name
    pattern = r"(CAE\d)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)_(\d+)\.xlsx"
    match = re.match(pattern, file_name)

    if match:
        cae_type = match.group(1)
        s1 = int(match.group(2))
        s2 = int(match.group(3))
        s3 = int(match.group(4))
        s4 = int(match.group(5))
        s5 = int(match.group(6))
        s6 = int(match.group(7))
        s7 = int(match.group(8))

        return {
            "s1": s1,
            "s2": s2,
            "s3": s3,
            "s4": s4,
            "s5": s5,
            "s6": s6,
            "s7": s7
        }
    else:
        return None

def build_cae(input_shape, num_original_features, s1, s2, s3, s4, s5, s6, s7):
    # Build Convolutional Autoencoder (CAE) based on extracted hyperparameters
    inputs = Input(shape=input_shape)

    # Encoder
    x = Conv1D(s1, 3, activation='relu', padding='same')(inputs)
    x = Conv1D(s2, 3, activation='relu', padding='same')(x)
    x = Flatten()(x)
    encoded = Dense(s5, activation='relu')(x)

    # Decoder
    x = Dense(s4, activation='relu')(encoded)
    x = Dense(num_original_features, activation='relu')(x)
    x = Reshape((num_original_features, 1))(x)
    x = Conv1DTranspose(s3, 3, activation='relu', padding='same')(x)
    x = Conv1DTranspose(s2, 3, activation='relu', padding='same')(x)
    decoded = Conv1DTranspose(1, 3, activation='linear', padding='same')(x)

    autoencoder = Model(inputs, decoded)
    autoencoder.compile(optimizer=Adam(), loss='mean_squared_error')

    encoder = Model(inputs, encoded)
    return autoencoder, encoder

def feature_extraction(X, encoder):
    # Extract deep features using CAE encoder
    deep_features = encoder.predict(X)
    return deep_features

def evaluate_classification(X, y, clf):
    # Evaluate classification performance
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(clf, X, y, cv=kf)
    return scores.mean()

def apply_ffs(X, y, clf):
    # Apply Forward Feature Selection
    sfs = SFS(clf,
              k_features='best',
              forward=True,
              floating=False,
              verbose=0,
              scoring='accuracy',
              cv=5) #lower cv for testing
    sfs.fit(X, y)
    return sfs.k_feature_names_, sfs.k_score_

for file_name in file_names:
    print("Processing file:", file_name)

    # Load data
    X, y, feature_names = load_data(file_name)

    # Preprocess data
    X_scaled = preprocess_data(X)

    print(X_scaled)

    # Extract CAE hyperparameters
    cae_hyperparameters = extract_cae_hyperparameters(file_name)
    print("Extracted CAE Hyperparameters:")
    print(cae_hyperparameters)

    # Build and train CAE
    input_shape = (X_scaled.shape[1], 1)
    num_original_features = X_scaled.shape[1]
    cae, encoder = build_cae(input_shape, num_original_features, **cae_hyperparameters)
    cae.fit(X_scaled, X_scaled, epochs=50, batch_size=32, verbose=0) #lower epochs for testing

    # Extract deep features
    deep_features = feature_extraction(X_scaled, encoder)

    # Trim deep features to match number of original features
    deep_features = deep_features[:, :num_original_features]

    # Combine Standard and Deep Features
    X_combined = np.hstack((X_scaled, deep_features))

    # Initialize and evaluate classifiers
    classifiers = [
        LDA(solver='lsqr'),
        #MLPClassifier(hidden_layer_sizes=(10, 5)), tune this and maybe adjust cv= in apply_ffs
        ExtraTreesClassifier(n_estimators=100)
    ]

    for clf in classifiers:
        try:
            print("Classifier:", clf.__class__.__name__)

            # Evaluate classification performance
            accuracy = evaluate_classification(X_combined, y, clf)
            print("Accuracy:", accuracy)

            # Apply Forward Feature Selection
            selected_features, feature_score = apply_ffs(X_combined, y, clf)
            print("Selected Features:", selected_features)
            print("Feature Score:", feature_score)
        except Exception as e:
            print("Error processing classifier", clf.__class__.__name__)
            print(e)

    print("-" * 80)

Processing file: CAE1_12_24_10_256_6_100_256.xlsx
[[-0.4490005  -0.26289159 -0.37953266 ...  0.37821412 -0.58518424
  -0.29320558]
 [-0.52169056 -0.03284657 -0.24281687 ...  0.21467911  1.31858802
  -0.10334295]
 [-0.45785978 -0.33491602 -0.39202644 ...  1.56691554 -1.09125992
   0.90747382]
 ...
 [-0.44020941 -0.17266458 -0.3711579  ... -0.65444145  0.45361158
   0.81141229]
 [-0.42287775 -0.27477486 -0.34188762 ...  0.06280107  2.09563878
   1.39112906]
 [ 1.04425631 -0.32367347  0.1217961  ... -0.1034195  -0.30326432
  -0.36036706]]
Extracted CAE Hyperparameters:
{'s1': 12, 's2': 24, 's3': 10, 's4': 256, 's5': 6, 's6': 100, 's7': 256}
Classifier: LinearDiscriminantAnalysis
Accuracy: 0.758858234295416
Selected Features: ('1', '2', '3', '4', '5', '6', '8', '9', '11', '12', '15', '16', '17', '20', '22')
Feature Score: 0.7481953926798235
Classifier: ExtraTreesClassifier
Accuracy: 0.8295483991378584
