In [1]:
import os
import numpy as np
import pandas as pd

ELECTRODE_INFO = {
    'C3': 'C3 - Central Left (Motor Control)',
    'C4': 'C4 - Central Right (Motor Control)',
    'Cz': 'Cz - Central Midline (Motor Coordination)',
    'F1': 'F1 - Frontal Left (Decision Making)',
    'F2': 'F2 - Frontal Right (Decision Making)',
    'F3': 'F3 - Frontal Left (Cognitive Function)',
    'F4': 'F4 - Frontal Right (Cognitive Function)',
    'F7': 'F7 - Frontal Left (Emotional Control)',
    'F8': 'F8 - Frontal Right (Emotional Control)',
    'Fp1': 'Fp1 - Frontal Pole Left (Attention)',
    'Fp2': 'Fp2 - Frontal Pole Right (Attention)',
    'Fz': 'Fz - Frontal Midline (Executive Function)',
    'O1': 'O1 - Occipital Left (Visual Processing)',
    'O2': 'O2 - Occipital Right (Visual Processing)',
    'P3': 'P3 - Parietal Left (Sensory Integration)',
    'P4': 'P4 - Parietal Right (Sensory Integration)',
    'Pz': 'Pz - Parietal Midline (Spatial Awareness)',
    'T3': 'T3 - Temporal Left (Language Comprehension)',
    'T4': 'T4 - Temporal Right (Language Comprehension)',
    'T5': 'T5 - Temporal Posterior Left (Memory)',
    'T6': 'T6 - Temporal Posterior Right (Memory)'
}

data_folder = 'C:/Users/ASUS/Desktop/EEG_data'
all_data = []  # Initialize all_data as an empty list
labels = []    # Initialize labels as an empty list

# Specify the extension you want to load (e.g., '.txt')
file_extension = '.txt'

# Load signals and label them with condition, state, and electrode information
for condition in ['AD', 'Healthy']:
    for state in ['Eyes_closed', 'Eyes_open']:
        state_path = os.path.join(data_folder, condition, state)

        # Dynamically handle any number of Paciente folders
        for patient_folder in os.listdir(state_path):
            patient_path = os.path.join(state_path, patient_folder)
            
            # Iterate through each electrode file within the patient's folder
            for electrode in ELECTRODE_INFO.keys():
                # Look for files named like 'C3.txt', 'C4.txt', etc.
                electrode_file = f"{electrode}{file_extension}"
                file_path = os.path.join(patient_path, electrode_file)
                
                if os.path.exists(file_path):
                    try:
                        signal = np.loadtxt(file_path)
                        all_data.append(signal)
                        # Append the condition, state, patient, and electrode information as labels
                        labels.append([condition, state, patient_folder, ELECTRODE_INFO[electrode]])
                    except Exception as e:
                        print(f"Error loading {file_path}: {e}")

# Check if any data was loaded
if all_data:
    df = pd.DataFrame(all_data)

    # Add condition, state, patient, and electrode as columns
    df['condition'] = [label[0] for label in labels]
    df['state'] = [label[1] for label in labels]
    df['patient'] = [label[2] for label in labels]
    df['electrode'] = [label[3] for label in labels]

    # Display the first few rows of the data
    print(df.head())
else:
    print("No data loaded. Check folder paths and file contents.")


         0        1        2        3        4        5       6        7  \
0 -2.90680 -6.69830 -5.75040 -2.90680 -1.95890 -0.06319 -1.0111 -4.80250   
1 -0.88889 -2.79370 -5.65080 -4.69840 -1.84130  1.01590  1.0159 -2.79370   
2 -6.82930 -8.78050 -6.82930 -2.92680  0.00000  0.00000 -2.9268 -5.85370   
3  1.92310  0.96154  0.96154  0.96154  0.96154  2.88460  2.8846 -0.96154   
4  0.00000 -2.94120 -1.96080 -0.98039  1.96080  2.94120  5.8824  1.96080   

         8        9  ...     1018     1019     1020    1021     1022    1023  \
0 -3.85470 -1.01110  ... -0.06319 -0.06319 -0.06319 -1.0111 -1.95890 -2.9068   
1 -2.79370 -0.88889  ... -1.84130 -3.74600 -3.74600 -1.8413  0.06349  1.9683   
2 -2.92680  0.97561  ...  0.00000  0.97561  0.97561  0.0000 -1.95120 -1.9512   
3  0.00000  0.00000  ...  1.92310 -1.92310 -2.88460 -2.8846 -3.84620 -4.8077   
4  0.98039  1.96080  ...  7.84310  3.92160  3.92160  2.9412  3.92160  3.9216   

   condition        state    patient  \
0         AD  Eyes_clo

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Flatten the EEG signal data (each 1024-value list becomes a single feature vector)
X = np.array([np.array(signal).flatten() for signal in all_data])  # Features (EEG signals)
y = np.array([label[0] for label in labels])  # Labels (AD or Healthy)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Display confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.93

Classification Report:
              precision    recall  f1-score   support

          AD       0.96      0.96      0.96       672
     Healthy       0.73      0.72      0.73        97

    accuracy                           0.93       769
   macro avg       0.84      0.84      0.84       769
weighted avg       0.93      0.93      0.93       769


Confusion Matrix:
[[646  26]
 [ 27  70]]


In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_rf = rf_clf.predict(X_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))

# Display confusion matrix
print("\nConfusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred_rf))

Random Forest Accuracy: 0.97

Classification Report (Random Forest):
              precision    recall  f1-score   support

          AD       0.96      1.00      0.98       672
     Healthy       1.00      0.73      0.85        97

    accuracy                           0.97       769
   macro avg       0.98      0.87      0.91       769
weighted avg       0.97      0.97      0.96       769


Confusion Matrix (Random Forest):
[[672   0]
 [ 26  71]]


In [7]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Assuming X and y are already prepared as in the previous code

# It's a good practice to scale the data for SVM
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the SVM classifier
svm_clf = SVC(kernel='linear', random_state=42)  # You can also try other kernels like 'rbf' or 'poly'
svm_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_svm = svm_clf.predict(X_test)

# Calculate accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm))

# Display confusion matrix
print("\nConfusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.98

Classification Report (SVM):
              precision    recall  f1-score   support

          AD       0.99      0.99      0.99       672
     Healthy       0.93      0.94      0.93        97

    accuracy                           0.98       769
   macro avg       0.96      0.96      0.96       769
weighted avg       0.98      0.98      0.98       769


Confusion Matrix (SVM):
[[665   7]
 [  6  91]]


In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler


# It's good practice to scale the data for k-NN
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the k-NN classifier
knn_clf = KNeighborsClassifier(n_neighbors=5)  # You can change the number of neighbors (k)
knn_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_knn = knn_clf.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"k-NN Accuracy: {accuracy_knn:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (k-NN):")
print(classification_report(y_test, y_pred_knn))

# Display confusion matrix
print("\nConfusion Matrix (k-NN):")
print(confusion_matrix(y_test, y_pred_knn))

k-NN Accuracy: 1.00

Classification Report (k-NN):
              precision    recall  f1-score   support

          AD       1.00      1.00      1.00       672
     Healthy       1.00      0.99      0.99        97

    accuracy                           1.00       769
   macro avg       1.00      0.99      1.00       769
weighted avg       1.00      1.00      1.00       769


Confusion Matrix (k-NN):
[[672   0]
 [  1  96]]


In [11]:
import numpy as np
import pandas as pd
from scipy.stats import entropy, skew, kurtosis

# Function to extract features from a single signal
def extract_features(signal):
    features = {}
    features['mean'] = np.mean(signal)
    features['std'] = np.std(signal)
    features['min'] = np.min(signal)
    features['max'] = np.max(signal)
    features['skewness'] = skew(signal)
    features['kurtosis'] = kurtosis(signal)
    # Calculate entropy using the probability density of the signal
    # Normalize the signal values to sum to 1
    signal_prob = np.histogram(signal, bins=50, density=True)[0]
    features['entropy'] = entropy(signal_prob + 1e-8)  # Adding small value to avoid log(0)
    
    return features

# Apply the feature extraction to all EEG signals
extracted_features = []
for signal in all_data:
    features = extract_features(signal)
    extracted_features.append(features)

# Convert the extracted features into a DataFrame
df_features = pd.DataFrame(extracted_features)

# Add the labels (condition, state, patient, electrode)
df_features['condition'] = [label[0] for label in labels]
df_features['state'] = [label[1] for label in labels]
df_features['patient'] = [label[2] for label in labels]
df_features['electrode'] = [label[3] for label in labels]

# Display the first few rows of the features DataFrame
print(df_features.head())

       mean       std     min        max  skewness  kurtosis   entropy  \
0  0.226521  5.872357 -19.021  16.051001  0.152169 -0.058698  3.218837   
1 -0.024865  6.907847 -19.937  29.587000  0.099320  0.295053  3.301411   
2  0.154342  7.221072 -29.268  24.389999 -0.024903  0.382880  3.277793   
3  0.479828  9.444859 -25.000  30.768999  0.596013  0.354002  3.461565   
4  0.481580  9.809538 -21.569  34.313999  0.625237  0.290154  3.487736   

  condition        state    patient                                  electrode  
0        AD  Eyes_closed  Paciente1          C3 - Central Left (Motor Control)  
1        AD  Eyes_closed  Paciente1         C4 - Central Right (Motor Control)  
2        AD  Eyes_closed  Paciente1  Cz - Central Midline (Motor Coordination)  
3        AD  Eyes_closed  Paciente1        F1 - Frontal Left (Decision Making)  
4        AD  Eyes_closed  Paciente1       F2 - Frontal Right (Decision Making)  


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming df_features contains the extracted features and labels

# Separate features (X) and labels (y)
X = df_features[['mean', 'std', 'min', 'max', 'skewness', 'kurtosis', 'entropy']]  # Features
y = df_features['condition']  # Labels (AD or Healthy)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy: {accuracy:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (Decision Tree):")
print(classification_report(y_test, y_pred))

# Display confusion matrix
print("\nConfusion Matrix (Decision Tree):")
print(confusion_matrix(y_test, y_pred))

Decision Tree Accuracy: 0.95

Classification Report (Decision Tree):
              precision    recall  f1-score   support

          AD       0.96      0.98      0.97       672
     Healthy       0.84      0.72      0.78        97

    accuracy                           0.95       769
   macro avg       0.90      0.85      0.87       769
weighted avg       0.95      0.95      0.95       769


Confusion Matrix (Decision Tree):
[[659  13]
 [ 27  70]]


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming df_features contains the extracted features and labels

# Separate features (X) and labels (y)
X = df_features[['mean', 'std', 'min', 'max', 'skewness', 'kurtosis', 'entropy']]  # Features
y = df_features['condition']  # Labels (AD or Healthy)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_rf = rf_clf.predict(X_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))

# Display confusion matrix
print("\nConfusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred_rf))

Random Forest Accuracy: 0.97

Classification Report (Random Forest):
              precision    recall  f1-score   support

          AD       0.97      1.00      0.98       672
     Healthy       0.96      0.79      0.87        97

    accuracy                           0.97       769
   macro avg       0.97      0.89      0.93       769
weighted avg       0.97      0.97      0.97       769


Confusion Matrix (Random Forest):
[[669   3]
 [ 20  77]]


In [17]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming df_features contains the extracted features and labels

# Separate features (X) and labels (y)
X = df_features[['mean', 'std', 'min', 'max', 'skewness', 'kurtosis', 'entropy']]  # Features
y = df_features['condition']  # Labels (AD or Healthy)

# Scale the features since SVM is sensitive to feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the SVM classifier
svm_clf = SVC(kernel='linear', random_state=42)  # Using a linear kernel for simplicity
svm_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_svm = svm_clf.predict(X_test)

# Calculate accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm))

# Display confusion matrix
print("\nConfusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred_svm))

SVM Accuracy: 0.93

Classification Report (SVM):
              precision    recall  f1-score   support

          AD       0.93      1.00      0.96       672
     Healthy       0.98      0.49      0.66        97

    accuracy                           0.93       769
   macro avg       0.96      0.75      0.81       769
weighted avg       0.94      0.93      0.93       769


Confusion Matrix (SVM):
[[671   1]
 [ 49  48]]


In [19]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming df_features contains the extracted features and labels

# Separate features (X) and labels (y)
X = df_features[['mean', 'std', 'min', 'max', 'skewness', 'kurtosis', 'entropy']]  # Features
y = df_features['condition']  # Labels (AD or Healthy)

# Scale the features since k-NN relies on distance metrics
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the k-NN classifier
knn_clf = KNeighborsClassifier(n_neighbors=5)  # Using k=5, but you can experiment with different k values
knn_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_knn = knn_clf.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"k-NN Accuracy: {accuracy_knn:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (k-NN):")
print(classification_report(y_test, y_pred_knn))

# Display confusion matrix
print("\nConfusion Matrix (k-NN):")
print(confusion_matrix(y_test, y_pred_knn))

k-NN Accuracy: 0.96

Classification Report (k-NN):
              precision    recall  f1-score   support

          AD       0.97      0.99      0.97       672
     Healthy       0.88      0.75      0.81        97

    accuracy                           0.96       769
   macro avg       0.92      0.87      0.89       769
weighted avg       0.95      0.96      0.95       769


Confusion Matrix (k-NN):
[[662  10]
 [ 24  73]]


In [30]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.model_selection import train_test_split

# Assuming X is your original EEG data (e.g., 1024 values per signal)
input_dim = X.shape[1]  # Number of input features (e.g., 1024)

# Define the autoencoder architecture
input_layer = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_layer)  # Compress to 128 dimensions
encoded = Dense(64, activation='relu')(encoded)       # Further compress to 64 dimensions (bottleneck)

decoded = Dense(128, activation='relu')(encoded)      # Expand back to 128 dimensions
decoded = Dense(input_dim, activation='sigmoid')(decoded)  # Output layer (reconstructed input)

# Define the autoencoder model
autoencoder = Model(input_layer, decoded)

# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(X, X, epochs=50, batch_size=256, shuffle=True, validation_split=0.2)

# Define the encoder model to extract the compressed features
encoder = Model(input_layer, encoded)

# Extract the compressed features (latent representation)
X_compressed = encoder.predict(X)

# Print the shape of the compressed data
print(f"Shape of compressed data: {X_compressed.shape}")

Epoch 1/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 334.4225 - val_loss: 387.7471
Epoch 2/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 336.6559 - val_loss: 387.6814
Epoch 3/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 328.2199 - val_loss: 387.6299
Epoch 4/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 337.7356 - val_loss: 387.6255
Epoch 5/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 329.0984 - val_loss: 387.6143
Epoch 6/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 331.0448 - val_loss: 387.6109
Epoch 7/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 333.3795 - val_loss: 387.6089
Epoch 8/50
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 312.9789 - val_loss: 387.6057
Epoch 9/50
[1m12/12[0

In [24]:
!pip install tensorflow




In [31]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming y contains the labels (AD or Healthy)

# Split the compressed data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_compressed, y, test_size=0.2, random_state=42)

# Initialize and train the Decision Tree classifier
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Decision Tree Accuracy on Compressed Data: {accuracy:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (Decision Tree):")
print(classification_report(y_test, y_pred))

# Display confusion matrix
print("\nConfusion Matrix (Decision Tree):")
print(confusion_matrix(y_test, y_pred))


Decision Tree Accuracy on Compressed Data: 0.94

Classification Report (Decision Tree):
              precision    recall  f1-score   support

          AD       0.96      0.97      0.97       672
     Healthy       0.77      0.74      0.76        97

    accuracy                           0.94       769
   macro avg       0.87      0.86      0.86       769
weighted avg       0.94      0.94      0.94       769


Confusion Matrix (Decision Tree):
[[651  21]
 [ 25  72]]


In [34]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Assuming X_compressed is the compressed data from the autoencoder
# and y contains the labels (AD or Healthy)

# Split the compressed data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_compressed, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_rf = rf_clf.predict(X_test)

# Calculate accuracy
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy on Compressed Data: {accuracy_rf:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred_rf))

# Display confusion matrix
print("\nConfusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred_rf))


Random Forest Accuracy on Compressed Data: 0.96

Classification Report (Random Forest):
              precision    recall  f1-score   support

          AD       0.96      1.00      0.98       672
     Healthy       0.96      0.68      0.80        97

    accuracy                           0.96       769
   macro avg       0.96      0.84      0.89       769
weighted avg       0.96      0.96      0.95       769


Confusion Matrix (Random Forest):
[[669   3]
 [ 31  66]]


In [36]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Assuming X_compressed is the compressed data from the autoencoder
# and y contains the labels (AD or Healthy)

# Scale the compressed features as SVM is sensitive to feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_compressed)

# Split the scaled data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the SVM classifier
svm_clf = SVC(kernel='linear', random_state=42)  # Using a linear kernel for simplicity
svm_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_svm = svm_clf.predict(X_test)

# Calculate accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy on Compressed Data: {accuracy_svm:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm))

# Display confusion matrix
print("\nConfusion Matrix (SVM):")
print(confusion_matrix(y_test, y_pred_svm))


SVM Accuracy on Compressed Data: 0.94

Classification Report (SVM):
              precision    recall  f1-score   support

          AD       0.93      1.00      0.96       672
     Healthy       0.96      0.52      0.67        97

    accuracy                           0.94       769
   macro avg       0.95      0.76      0.82       769
weighted avg       0.94      0.94      0.93       769


Confusion Matrix (SVM):
[[670   2]
 [ 47  50]]


In [38]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# Assuming X_compressed is the compressed data from the autoencoder
# and y contains the labels (AD or Healthy)

# Scale the compressed features since k-NN relies on distance metrics
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_compressed)

# Split the scaled data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize and train the k-NN classifier
knn_clf = KNeighborsClassifier(n_neighbors=5)  # Using k=5, but you can experiment with different values of k
knn_clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred_knn = knn_clf.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"k-NN Accuracy on Compressed Data: {accuracy_knn:.2f}")

# Display classification report for precision, recall, F1-score
print("\nClassification Report (k-NN):")
print(classification_report(y_test, y_pred_knn))

# Display confusion matrix
print("\nConfusion Matrix (k-NN):")
print(confusion_matrix(y_test, y_pred_knn))


k-NN Accuracy on Compressed Data: 0.95

Classification Report (k-NN):
              precision    recall  f1-score   support

          AD       0.96      0.99      0.97       672
     Healthy       0.88      0.70      0.78        97

    accuracy                           0.95       769
   macro avg       0.92      0.84      0.88       769
weighted avg       0.95      0.95      0.95       769


Confusion Matrix (k-NN):
[[663   9]
 [ 29  68]]
