In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# üëá change this to the folder that contains your `Data/` directory
%cd "/content/drive/Shareddrives/AI Health Project/Project Code"

Mounted at /content/drive
/content/drive/Shareddrives/AI Health Project/Project Code


# Task
Load the 'A', 'D', and 'E' EEG datasets from the directory '/content/drive/Shareddrives/AI Health Project/Project Code/BONN EEG Dataset'. For each of these datasets, read all the individual text files (e.g., 'Z001.txt' to 'Z100.txt' for 'A') and concatenate them to form complete EEG signals for each category.

## Load BONN EEG Data

### Subtask:
Load the 'A', 'D', and 'E' EEG datasets from the specified path, read all individual text files for each dataset, and concatenate them to form complete EEG signals for each category.


In [None]:
import os
import numpy as np

dataset_prefixes = ['A','D','E']
bonn_eeg_dir = '/content/drive/Shareddrives/AI Health Project/Project Code/BONN EEG Dataset'

# Initialize an empty dictionary to store the concatenated EEG signals for each category
eeg_data_dict = {}
# Initialize an empty list to store the corresponding category labels
labels = []

# Iterate through each prefix in the dataset_prefixes list
for prefix in dataset_prefixes:
    # Construct the full path to the directory containing the text files for the current prefix
    prefix_dir = os.path.join(bonn_eeg_dir, prefix)

    # Initialize an empty list to temporarily store the EEG data for the current prefix
    current_prefix_signals = []

    # Get a list of all files in the current prefix's directory and filter for .txt files
    try:
        all_files = os.listdir(prefix_dir)
        text_files = sorted([f for f in all_files if f.endswith('.txt')])
    except FileNotFoundError:
        print(f"Directory not found: {prefix_dir}. Skipping prefix {prefix}.")
        continue

    # For each sorted text file, load the data and append to current_prefix_signals
    for file_name in text_files:
        file_path = os.path.join(prefix_dir, file_name)
        data = np.loadtxt(file_path)
        current_prefix_signals.append(data)

    # Concatenate all arrays in current_prefix_signals into a single NumPy array
    if current_prefix_signals:
        concatenated_signal = np.concatenate(current_prefix_signals)
        # Store this concatenated array in the eeg_data_dict with the prefix as the key
        eeg_data_dict[prefix] = concatenated_signal
        # Append the prefix to the labels list
        labels.append(prefix)
    else:
        print(f"No .txt files found in {prefix_dir}. Skipping prefix {prefix}.")

print("EEG data loaded and concatenated.")
print(f"Categories loaded: {list(eeg_data_dict.keys())}")
print(f"Labels created: {labels}")

# Display the shape of one of the loaded datasets as an example
if 'A' in eeg_data_dict:
    print(f"Shape of 'A' dataset: {eeg_data_dict['A'].shape}")


EEG data loaded and concatenated.
Categories loaded: ['A', 'D', 'E']
Labels created: ['A', 'D', 'E']
Shape of 'A' dataset: (409700,)


In [None]:
import os
import numpy as np

# 1. Initialize empty lists
X_raw = []
y_raw = []

# 2. Define base directory path
base_dir = '/content/drive/Shareddrives/AI Health Project/Project Code/BONN EEG Dataset'
categories = ['A', 'D', 'E']

# 3. Iterate through categories
for category in categories:
    folder_path = os.path.join(base_dir, category)

    # 4. List and sort .txt files
    try:
        file_names = sorted([f for f in os.listdir(folder_path) if f.endswith('.txt')])
    except FileNotFoundError:
        print(f"Directory not found: {folder_path}")
        continue

    # 5. Loop through files and load data
    for file_name in file_names:
        file_path = os.path.join(folder_path, file_name)
        try:
            data = np.loadtxt(file_path)
            X_raw.append(data)
            y_raw.append(category)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

# 6. Verify loaded data
print(f"Total samples loaded: {len(X_raw)}")
if len(X_raw) > 0:
    print(f"Shape of first sample: {X_raw[0].shape}")
    print(f"Labels distribution: {np.unique(y_raw, return_counts=True)}")

Total samples loaded: 300
Shape of first sample: (4097,)
Labels distribution: (array(['A', 'D', 'E'], dtype='<U1'), array([100, 100, 100]))


## Preprocessing and Trimming

### Subtask:
Preprocess the 300 raw EEG segments by trimming them to 4096 data points, applying a 40Hz low-pass filter, and normalizing the values to the range [-1, 1].


**Reasoning**:
The user wants to preprocess the raw EEG data by trimming, filtering, and normalizing each of the 300 segments individually. I will implement the specified signal processing steps: designing a Butterworth low-pass filter, iterating through the raw data list, applying the filter and normalization to each trimmed segment, and storing the results in a new array.



In [None]:
from scipy.signal import butter, filtfilt
import numpy as np

# 1. Define parameters
sfreq = 173.61
cutoff_freq = 40.0
nyquist = sfreq / 2
order = 5

# 2. Design Butterworth low-pass filter
b, a = butter(order, cutoff_freq / nyquist, btype='low')

# 3. Initialize list for processed signals
X_processed = []

# 4. Iterate, trim, filter, and normalize
for signal in X_raw:
    # Trim to 4096 points
    trimmed_signal = signal[:4096]

    # Apply low-pass filter
    filtered_signal = filtfilt(b, a, trimmed_signal)

    # Normalize to [-1, 1]
    max_abs_val = np.max(np.abs(filtered_signal))
    if max_abs_val > 0:
        normalized_signal = filtered_signal / max_abs_val
    else:
        normalized_signal = filtered_signal

    X_processed.append(normalized_signal)

# 5. Convert to numpy array
X_processed = np.array(X_processed)

print(f"Preprocessing complete.")
print(f"Shape of X_processed: {X_processed.shape}")

Preprocessing complete.
Shape of X_processed: (300, 4096)


## SWT Decomposition (Level 4)

### Subtask:
Apply Stationary Wavelet Transform (SWT) using the 'db4' wavelet at level 4 to each of the 300 preprocessed EEG signals.


**Reasoning**:
Perform Stationary Wavelet Transform (SWT) on the preprocessed EEG signals. Since the signal length is 4096, a 4-level decomposition is feasible (4096 is divisible by 2^4=16). I will iterate through the `X_processed` array, apply `pywt.swt` with 'db4' wavelet at level 4, and store the coefficients.



In [None]:
import pywt

# 1. Define wavelet parameters
wavelet = 'db4'
level = 4

# 2. Initialize list for decomposed signals
X_decomposed = []

# 3. Iterate through processed signals and apply SWT
for signal in X_processed:
    # Apply Stationary Wavelet Transform
    # signal length is 4096, which is a multiple of 2^4 (16), so level 4 is valid.
    coeffs = pywt.swt(signal, wavelet, level=level)
    X_decomposed.append(coeffs)

print("SWT Decomposition complete.")
if len(X_decomposed) > 0:
    print(f"Number of decomposed samples: {len(X_decomposed)}")
    print(f"Decomposition levels per sample: {len(X_decomposed[0])}")
    # coeffs is a list of tuples (approximation, detail)
    # Let's verify the shape of coefficients at the last level (level 4)
    # coeffs[0] corresponds to level 1 if swt returns [level1, level2...] or vice versa?
    # pywt.swt returns coefficients starting from the highest decomposition level (approximation, detail)
    # effectively [(cA_n, cD_n), ..., (cA_1, cD_1)]

    # Checking the structure
    for i, (cA, cD) in enumerate(X_decomposed[0]):
        print(f"Level {level-i}: cA shape={cA.shape}, cD shape={cD.shape}")

SWT Decomposition complete.
Number of decomposed samples: 300
Decomposition levels per sample: 4
Level 4: cA shape=(4096,), cD shape=(4096,)
Level 3: cA shape=(4096,), cD shape=(4096,)
Level 2: cA shape=(4096,), cD shape=(4096,)
Level 1: cA shape=(4096,), cD shape=(4096,)


## Feature Extraction

### Subtask:
Extract statistical features and Hjorth parameters from all SWT coefficients and calculate ratios between adjacent detail coefficients for each EEG segment.


**Reasoning**:
I will define a function to calculate statistical features and Hjorth parameters for each signal coefficient. Then, I will iterate through the decomposed signals, extracting these features for all levels (A and D) and computing the specified ratios between detail coefficients. Finally, I will aggregate all features into a pandas DataFrame.



In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis

def calculate_features(signal):
    """Calculates statistical and Hjorth features for a given signal."""
    mav = np.mean(np.abs(signal))
    std = np.std(signal)
    skw = skew(signal)
    kurt = kurtosis(signal)
    rms = np.sqrt(np.mean(signal**2))

    # Hjorth Parameters
    activity = np.var(signal)
    gradient = np.diff(signal)
    mobility = np.sqrt(np.var(gradient) / activity) if activity > 0 else 0

    grad_mobility = np.sqrt(np.var(np.diff(gradient)) / np.var(gradient)) if np.var(gradient) > 0 else 0
    complexity = grad_mobility / mobility if mobility > 0 else 0

    return {
        'mean_abs': mav,
        'std': std,
        'skew': skw,
        'kurtosis': kurt,
        'rms': rms,
        'activity': activity,
        'mobility': mobility,
        'complexity': complexity
    }

features_list = []

# Iterate through each decomposed sample
for idx, coeffs in enumerate(X_decomposed):
    # coeffs structure from pywt.swt with level=4 is [(cA4, cD4), (cA3, cD3), (cA2, cD2), (cA1, cD1)]
    # We map them to names for easier access

    # Level 4
    A4, D4 = coeffs[0]
    # Level 3
    A3, D3 = coeffs[1]
    # Level 2
    A2, D2 = coeffs[2]
    # Level 1
    A1, D1 = coeffs[3]

    coeff_map = {
        'A4': A4, 'D4': D4,
        'A3': A3, 'D3': D3,
        'A2': A2, 'D2': D2,
        'A1': A1, 'D1': D1
    }

    sample_features = {}

    # Extract features for each coefficient
    for name, signal in coeff_map.items():
        feats = calculate_features(signal)
        for key, val in feats.items():
            sample_features[f"{name}_{key}"] = val

    # Calculate ratios of MAV for adjacent detail coefficients
    # Ratios: D1/D2, D2/D3, D3/D4
    d1_mav = sample_features['D1_mean_abs']
    d2_mav = sample_features['D2_mean_abs']
    d3_mav = sample_features['D3_mean_abs']
    d4_mav = sample_features['D4_mean_abs']

    sample_features['ratio_D1_D2'] = d1_mav / d2_mav if d2_mav != 0 else 0
    sample_features['ratio_D2_D3'] = d2_mav / d3_mav if d3_mav != 0 else 0
    sample_features['ratio_D3_D4'] = d3_mav / d4_mav if d4_mav != 0 else 0

    features_list.append(sample_features)

# Create DataFrame
features_df = pd.DataFrame(features_list)

print(f"Feature extraction complete.")
print(f"Shape of features_df: {features_df.shape}")
print(features_df.head())

Feature extraction complete.
Shape of features_df: (300, 67)
   A4_mean_abs    A4_std   A4_skew  A4_kurtosis    A4_rms  A4_activity  \
0     0.514273  0.630681 -0.312607     0.580392  0.646412     0.397759   
1     0.839970  0.480984 -0.200557     0.159825  0.957043     0.231345   
2     0.629943  0.735013  0.046509     0.012972  0.783670     0.540244   
3     0.488936  0.606446  0.045974     0.337587  0.613040     0.367777   
4     0.637755  0.665370  0.289895    -0.305466  0.765576     0.442717   

   A4_mobility  A4_complexity  D4_mean_abs    D4_std  ...    D1_std   D1_skew  \
0     0.096125       1.805612     0.350261  0.442862  ...  0.013104  0.002062   
1     0.099726       1.992681     0.308872  0.394569  ...  0.013773  0.498374   
2     0.085663       1.925613     0.348887  0.449802  ...  0.014312 -0.099478   
3     0.105121       1.929247     0.415642  0.532707  ...  0.027106 -0.001890   
4     0.077112       2.335658     0.330770  0.428103  ...  0.015439  0.003122   

   D1_k

**Reasoning**:
Before performing feature selection using the Binary Dragonfly Algorithm, the categorical labels need to be encoded into numerical values, and the features should be standardized to ensure optimal performance of the distance-based classifier (likely KNN) used within the wrapper method.



In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y_raw)

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features_df)

print("Labels encoded and features scaled.")
print(f"Classes: {le.classes_}")
print(f"Shape of X_scaled: {X_scaled.shape}")

Labels encoded and features scaled.
Classes: ['A' 'D' 'E']
Shape of X_scaled: (300, 67)


**Reasoning**:
I will implement the Binary Dragonfly Algorithm (BDFA) to select the optimal subset of features from the processed dataset. This involves defining a `BDFA` class that optimizes a binary feature mask by maximizing the cross-validated accuracy of a K-Nearest Neighbors classifier (the fitness function). Finally, I will run the algorithm to identify the most relevant features and create a new dataset `X_selected` containing only those features.



In [None]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

class BDFA:
    def __init__(self, n_agents, max_iter, n_features, X, y):
        self.n_agents = n_agents
        self.max_iter = max_iter
        self.n_features = n_features
        self.X = X
        self.y = y
        self.positions = np.random.randint(2, size=(n_agents, n_features))
        self.steps = np.zeros((n_agents, n_features))
        self.fitness = np.full(n_agents, -np.inf)
        self.food_pos = np.zeros(n_features)
        self.food_fitness = -np.inf
        self.enemy_pos = np.zeros(n_features)
        self.enemy_fitness = np.inf

    def get_fitness(self, mask):
        if np.sum(mask) == 0:
            return -np.inf
        # Use a simple KNN for wrapper evaluation
        clf = KNeighborsClassifier(n_neighbors=5)
        X_sub = self.X[:, mask.astype(bool)]
        # 3-fold CV for speed and generalization
        scores = cross_val_score(clf, X_sub, self.y, cv=3, scoring='accuracy')
        return scores.mean()

    def step(self, t):
        # Update inertia weight (w) and other parameters
        w = 0.9 - t * ((0.9 - 0.4) / self.max_iter)

        # Weights for separation, alignment, cohesion, food, enemy
        # Using simplified global interaction model for efficiency
        f_w = 1.0; e_w = 1.0

        # Evaluate fitness
        for i in range(self.n_agents):
            fit = self.get_fitness(self.positions[i])
            self.fitness[i] = fit

            # Update Food (Best Solution)
            if fit > self.food_fitness:
                self.food_fitness = fit
                self.food_pos = self.positions[i].copy()

            # Update Enemy (Worst Solution)
            if fit < self.enemy_fitness:
                self.enemy_fitness = fit
                self.enemy_pos = self.positions[i].copy()

        # Update agents
        for i in range(self.n_agents):
            # Attraction to Food and Distraction from Enemy
            F = self.food_pos - self.positions[i]
            E = self.enemy_pos + self.positions[i]

            # Update step vector (simulating velocity)
            self.steps[i] = (f_w * F + e_w * E) + w * self.steps[i]

            # Sigmoid Transfer Function for Binarization
            sigmoid = 1 / (1 + np.exp(-self.steps[i]))

            # Update position based on probability
            r = np.random.rand(self.n_features)
            self.positions[i] = (r < sigmoid).astype(int)

    def run(self):
        for t in range(self.max_iter):
            self.step(t)
            if (t+1) % 5 == 0:
                print(f"Iteration {t+1}/{self.max_iter}, Best Fitness: {self.food_fitness:.4f}, Num Features: {np.sum(self.food_pos)}")

        return self.food_pos, self.food_fitness

# Run BDFA
# Using 10 agents and 20 iterations for demonstration speed
print("Starting BDFA Feature Selection...")
bdfa = BDFA(n_agents=10, max_iter=20, n_features=X_scaled.shape[1], X=X_scaled, y=y)
best_mask_binary, best_score = bdfa.run()

# Apply selection
best_mask = best_mask_binary.astype(bool)

# If no features selected (edge case), select all or a fallback
if np.sum(best_mask) == 0:
    print("Warning: No features selected. Reverting to all features.")
    best_mask = np.ones(X_scaled.shape[1], dtype=bool)

selected_feature_names = features_df.columns[best_mask]
X_selected = X_scaled[:, best_mask]

print(f"\nOptimization Complete.")
print(f"Best Accuracy (KNN Wrapper): {best_score:.4f}")
print(f"Selected Features ({sum(best_mask)}): {list(selected_feature_names)}")
print(f"Shape of X_selected: {X_selected.shape}")

Starting BDFA Feature Selection...
Iteration 5/20, Best Fitness: 0.9467, Num Features: 39
Iteration 10/20, Best Fitness: 0.9467, Num Features: 39
Iteration 15/20, Best Fitness: 0.9467, Num Features: 39
Iteration 20/20, Best Fitness: 0.9500, Num Features: 56

Optimization Complete.
Best Accuracy (KNN Wrapper): 0.9500
Selected Features (56): ['A4_mean_abs', 'A4_std', 'A4_kurtosis', 'A4_rms', 'A4_complexity', 'D4_mean_abs', 'D4_std', 'D4_skew', 'D4_kurtosis', 'D4_rms', 'D4_activity', 'D4_mobility', 'D4_complexity', 'A3_mean_abs', 'A3_std', 'A3_skew', 'A3_kurtosis', 'A3_rms', 'A3_activity', 'A3_mobility', 'A3_complexity', 'D3_mean_abs', 'D3_std', 'D3_skew', 'D3_kurtosis', 'D3_activity', 'D3_mobility', 'D3_complexity', 'A2_std', 'A2_skew', 'A2_kurtosis', 'A2_rms', 'A2_mobility', 'A2_complexity', 'D2_mean_abs', 'D2_std', 'D2_kurtosis', 'D2_rms', 'D2_activity', 'D2_mobility', 'A1_mean_abs', 'A1_skew', 'A1_kurtosis', 'A1_rms', 'A1_activity', 'A1_mobility', 'A1_complexity', 'D1_mean_abs', 'D1_s

## Feature Selection (BDFA)

### Subtask:
Encode labels, standardize features, and perform feature selection using the Binary Dragonfly Algorithm (BDFA).


**Reasoning**:
I will start by importing the necessary libraries, encoding the categorical labels into numerical values, and standardizing the features. This prepares the data for the feature selection process.



In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
import numpy as np

# Encode labels
le = LabelEncoder()
y = le.fit_transform(y_raw)
print(f"Classes: {le.classes_}")

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features_df)
print(f"Shape of X_scaled: {X_scaled.shape}")

Classes: ['A' 'D' 'E']
Shape of X_scaled: (300, 67)


In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
import numpy as np

# 1-3. Encode labels and Standardize features (ensure variables are ready)
# Re-running to ensure environment consistency with new imports
le = LabelEncoder()
y = le.fit_transform(y_raw)
print(f"Classes: {le.classes_}")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(features_df)
print(f"Shape of X_scaled: {X_scaled.shape}")

# 4. Define BDFA Class
class BDFA:
    def __init__(self, n_agents, max_iter, n_features, X, y):
        self.n_agents = n_agents
        self.max_iter = max_iter
        self.n_features = n_features
        self.X = X
        self.y = y

        # Initialize random binary positions
        self.positions = np.random.randint(2, size=(n_agents, n_features))
        # Initialize step vectors
        self.steps = np.zeros((n_agents, n_features))
        # Fitness array
        self.fitness = np.full(n_agents, -np.inf)

        # Food (Best solution found)
        self.food_pos = np.zeros(n_features)
        self.food_fitness = -np.inf

        # Enemy (Worst solution found)
        self.enemy_pos = np.zeros(n_features)
        self.enemy_fitness = np.inf

    def get_fitness(self, mask):
        # If no features selected, fitness is lowest possible
        if np.sum(mask) == 0:
            return -np.inf

        # Select features
        X_subset = self.X[:, mask.astype(bool)]

        # Evaluate using KNN with 3-fold CV
        clf = KNeighborsClassifier(n_neighbors=5)
        scores = cross_val_score(clf, X_subset, self.y, cv=3, scoring='accuracy')
        return scores.mean()

    def step(self, t):
        # Update inertia weight
        w = 0.9 - t * ((0.9 - 0.4) / self.max_iter)

        # Standard Dragonfly weights
        s_w = 0.1  # Separation
        a_w = 0.1  # Alignment
        c_w = 0.1  # Cohesion
        f_w = 1.0  # Food attraction
        e_w = 1.0  # Enemy distraction

        # Evaluate fitness for all agents
        for i in range(self.n_agents):
            fit = self.get_fitness(self.positions[i])
            self.fitness[i] = fit

            # Update Food (Best)
            if fit > self.food_fitness:
                self.food_fitness = fit
                self.food_pos = self.positions[i].copy()

            # Update Enemy (Worst)
            if fit < self.enemy_fitness:
                self.enemy_fitness = fit
                self.enemy_pos = self.positions[i].copy()

        # Calculate swarm global metrics for simplified calculation
        total_pos = np.sum(self.positions, axis=0)
        total_step = np.sum(self.steps, axis=0)

        for i in range(self.n_agents):
            # Neighbors metrics (Global swarm)
            sum_pos_others = total_pos - self.positions[i]
            sum_step_others = total_step - self.steps[i]
            N_others = self.n_agents - 1
            if N_others == 0: N_others = 1

            # Separation (S)
            S = - (N_others * self.positions[i] - sum_pos_others)
            # Alignment (A)
            A = sum_step_others / N_others
            # Cohesion (C)
            C = (sum_pos_others / N_others) - self.positions[i]
            # Food (F)
            F = self.food_pos - self.positions[i]
            # Enemy (E)
            E = self.enemy_pos + self.positions[i]

            # Update Step Vector (Delta X)
            dX = (s_w * S + a_w * A + c_w * C + f_w * F + e_w * E) + w * self.steps[i]

            # Clip dX to prevent overflow in sigmoid
            dX = np.clip(dX, -10, 10)
            self.steps[i] = dX

            # Sigmoid Transfer Function for Binarization
            T = 1 / (1 + np.exp(-dX))

            # Update Position
            r = np.random.rand(self.n_features)
            self.positions[i] = (r < T).astype(int)

    def run(self):
        print("Starting BDFA optimization...")
        for t in range(self.max_iter):
            self.step(t)
            if (t+1) % 5 == 0:
                print(f"Iteration {t+1}/{self.max_iter}, Best Fitness: {self.food_fitness:.4f}, Features: {int(np.sum(self.food_pos))}")
        return self.food_pos, self.food_fitness

# 5-6. Instantiate and Run BDFA
bdfa = BDFA(n_agents=10, max_iter=20, n_features=X_scaled.shape[1], X=X_scaled, y=y)
best_mask_binary, best_score = bdfa.run()

# 7. Apply Selection
best_mask = best_mask_binary.astype(bool)

# Safety check if no features selected
if np.sum(best_mask) == 0:
    print("Warning: No features selected by BDFA. Reverting to all features.")
    best_mask[:] = True

X_selected = X_scaled[:, best_mask]
selected_feature_names = features_df.columns[best_mask].tolist()

# 8. Print Results
print("\nOptimization Results:")
print(f"Best Accuracy: {best_score:.4f}")
print(f"Selected {len(selected_feature_names)} features.")
print(f"Feature names: {selected_feature_names}")
print(f"Shape of X_selected: {X_selected.shape}")

Classes: ['A' 'D' 'E']
Shape of X_scaled: (300, 67)
Starting BDFA optimization...
Iteration 5/20, Best Fitness: 0.9400, Features: 59
Iteration 10/20, Best Fitness: 0.9400, Features: 59
Iteration 15/20, Best Fitness: 0.9433, Features: 62
Iteration 20/20, Best Fitness: 0.9433, Features: 62

Optimization Results:
Best Accuracy: 0.9433
Selected 62 features.
Feature names: ['A4_mean_abs', 'A4_std', 'A4_skew', 'A4_kurtosis', 'A4_rms', 'A4_activity', 'A4_mobility', 'A4_complexity', 'D4_mean_abs', 'D4_std', 'D4_skew', 'D4_kurtosis', 'D4_rms', 'D4_activity', 'D4_mobility', 'D4_complexity', 'A3_mean_abs', 'A3_std', 'A3_skew', 'A3_kurtosis', 'A3_rms', 'A3_activity', 'A3_mobility', 'A3_complexity', 'D3_mean_abs', 'D3_std', 'D3_kurtosis', 'D3_rms', 'D3_activity', 'D3_mobility', 'D3_complexity', 'A2_mean_abs', 'A2_std', 'A2_skew', 'A2_kurtosis', 'A2_rms', 'A2_activity', 'A2_mobility', 'A2_complexity', 'D2_mean_abs', 'D2_std', 'D2_rms', 'D2_activity', 'D2_mobility', 'D2_complexity', 'A1_std', 'A1_kur

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, BatchNormalization, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

# Perform Train/Test Split using the selected features
# X_selected and y come from the previous cells
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training shape: {X_train.shape}, Test shape: {X_test.shape}")

# 1. Define the Simplified DNN architecture (2 hidden layers)
model_simple = Sequential([
    Input(shape=(X_train.shape[1],)),

    # Hidden Layer 1
    Dense(64, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(),
    Dropout(0.3),

    # Hidden Layer 2
    Dense(32, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(),
    Dropout(0.3),

    # Output layer for 3 categories (A, D, E)
    Dense(3, activation='softmax')
])

# 2. Compile the model
model_simple.compile(optimizer='adam',
                     loss='sparse_categorical_crossentropy',
                     metrics=['accuracy'])

# 3. Define Callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.00001, verbose=1)
early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True, verbose=1)

# 4. Train the model
history_simple = model_simple.fit(
    X_train, y_train,
    epochs=100,
    batch_size=16,
    validation_split=0.1,
    callbacks=[reduce_lr, early_stop],
    verbose=1
)

# 5. Evaluate the model
loss, acc = model_simple.evaluate(X_test, y_test, verbose=0)
y_pred_prob = model_simple.predict(X_test)
y_pred = y_pred_prob.argmax(axis=1)

print("\n--- Simplified Model Evaluation (2 Hidden Layers) ---")
print(f"Test Accuracy: {acc:.4f}")

# Explicitly define target names based on the known classes ['A', 'D', 'E']
target_names = ['A', 'D', 'E']

print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))

Training shape: (240, 62), Test shape: (60, 62)
Epoch 1/100
[1m14/14[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 35ms/step - accuracy: 0.3449 - loss: 1.6237 - val_accuracy: 0.4583 - val_loss: 0.9344 - learning_rate: 0.0010
Epoch 2/100
[1m14/14[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.4778 - loss: 1.1889 - val_accuracy: 0.8333 - val_loss: 0.6733 - learning_rate: 0.0010
Epoch 3/100
[1m14/14[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6606 - loss: 0.8889 - val_accuracy: 1.0000 - val_loss: 0.5353 - learning_rate: 0.0010
Epoch 4/100
[1m14/14[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7091 - loss: 0.6699 - val_accuracy: 0.9583 - val_loss: 0.4334 - learning_rate: 0.0010
Epoch 5/100
[1m14/14[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î



[1m1/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [1m0s[0m 83ms/step



[1m2/2[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 86ms/step

--- Simplified Model Evaluation (2 Hidden Layers) ---
Test Accuracy: 1.0000

Classification Report:
              precision    recall  f1-score   support

           A       1.00      1.00      1.00        20
           D       1.00      1.00      1.00        20
           E       1.00      1.00      1.00        20

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60



In [None]:
import joblib
import os

# Define the directory to save the model and artifacts
save_dir = '/content/drive/Shareddrives/AI Health Project/Project Code/Models'
os.makedirs(save_dir, exist_ok=True)

# 1. Save the Model Parameters (Weights)
# Use .weights.h5 for Keras 3+ format compatibility
weights_path = os.path.join(save_dir, 'dnn_model_weights.weights.h5')
model_simple.save_weights(weights_path)
print(f"Model weights saved to: {weights_path}")

# 2. Save the Full Model (Architecture + Weights + Optimizer state)
# This is recommended as it saves everything needed to resume or use the model
model_path = os.path.join(save_dir, 'dnn_model_full.keras')
model_simple.save(model_path)
print(f"Full model saved to: {model_path}")

# 3. Save Preprocessing Objects (StandardScaler and LabelEncoder)
# These are CRITICAL for making predictions on new data later
joblib.dump(scaler, os.path.join(save_dir, 'scaler.pkl'))
joblib.dump(le, os.path.join(save_dir, 'label_encoder.pkl'))
print("Preprocessing objects (scaler, label_encoder) saved successfully.")

Model weights saved to: /content/drive/Shareddrives/AI Health Project/Project Code/Models/dnn_model_weights.weights.h5
Full model saved to: /content/drive/Shareddrives/AI Health Project/Project Code/Models/dnn_model_full.keras
Preprocessing objects (scaler, label_encoder) saved successfully.
