# Baseline Experiments & GA Feature Selection Test

This notebook:
1. Loads the cleaned diabetes dataset
2. Runs baseline classification with all features
3. Tests the GA feature selection implementation
4. Compares results

In [8]:
# Import required libraries
import numpy as np
import sys
import os
import importlib

# Add project root to path
sys.path.insert(0, os.path.abspath('..'))

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

# Import and reload modules to pick up any changes
from src.models import classifier
from src import ga

# Reload to pick up changes
importlib.reload(classifier)
importlib.reload(ga.fitness)
importlib.reload(ga.operators)
importlib.reload(ga.genetic_algorithm)
importlib.reload(ga)

from src.models.classifier import get_decision_tree
from src.ga import GeneticAlgorithm

print("All imports successful!")

All imports successful!


## 1. Load Cleaned Diabetes Dataset

In [2]:
# Load the cleaned diabetes dataset
data_path = '../data/cleaned/cleaned_diabetes_data.csv'

# Load data using numpy
data = np.genfromtxt(data_path, delimiter=',', skip_header=1)

print(f"Dataset shape: {data.shape}")
print(f"Number of samples: {data.shape[0]}")
print(f"Number of columns: {data.shape[1]}")

# Separate features and target
# Target is 'Diagnosis' column (index 43, 0-based)
target_col_idx = 43
X = np.delete(data, target_col_idx, axis=1)
y = data[:, target_col_idx].astype(int)

print(f"\nFeatures shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"Unique classes: {np.unique(y)}")
print(f"Class distribution: {np.bincount(y.astype(int))}")

Dataset shape: (1879, 52)
Number of samples: 1879
Number of columns: 52

Features shape: (1879, 51)
Target shape: (1879,)
Unique classes: [0 1 2 3 4 5 6 7 8 9]
Class distribution: [193 198 188 171 185 180 189 185 197 193]


In [3]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Train class distribution: {np.bincount(y_train.astype(int))}")
print(f"Test class distribution: {np.bincount(y_test.astype(int))}")

Training set: 1503 samples
Test set: 376 samples
Train class distribution: [154 158 151 137 148 144 151 148 158 154]
Test class distribution: [39 40 37 34 37 36 38 37 39 39]


## 2. Baseline: Decision Tree with All Features

In [5]:
# Train baseline classifier with all features
baseline_clf = get_decision_tree(max_depth=10, min_samples_split=5, random_state=42)
baseline_clf.fit(X_train, y_train)

# Predictions
y_pred_baseline = baseline_clf.predict(X_test)

# Metrics
baseline_accuracy = accuracy_score(y_test, y_pred_baseline)
baseline_f1 = f1_score(y_test, y_pred_baseline, average='weighted')  # weighted for multiclass

print("=" * 60)
print("BASELINE: All Features")
print("=" * 60)
print(f"Number of features used: {X_train.shape[1]}")
print(f"Test Accuracy: {baseline_accuracy:.4f}")
print(f"Test F1-Score (weighted): {baseline_f1:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_baseline))

BASELINE: All Features
Number of features used: 51
Test Accuracy: 0.0824
Test F1-Score (weighted): 0.0739

Classification Report:
              precision    recall  f1-score   support

           0       0.02      0.03      0.02        39
           1       0.12      0.17      0.15        40
           2       0.03      0.03      0.03        37
           3       0.05      0.03      0.04        34
           4       0.17      0.05      0.08        37
           5       0.10      0.25      0.15        36
           6       0.04      0.03      0.03        38
           7       0.08      0.03      0.04        37
           8       0.08      0.08      0.08        39
           9       0.11      0.13      0.12        39

    accuracy                           0.08       376
   macro avg       0.08      0.08      0.07       376
weighted avg       0.08      0.08      0.07       376



## 3. GA Feature Selection (Small Test Run)

In [9]:
# Create classifier factory for GA
classifier_factory = lambda: get_decision_tree(max_depth=10, min_samples_split=5, random_state=42)

# Configure GA (small test run)
ga = GeneticAlgorithm(
    n_features=X_train.shape[1],
    population_size=20,  # Small for quick test
    n_generations=10,    # Short test run
    classifier_factory=classifier_factory,
    selection_method='tournament',
    tournament_size=3,
    crossover_method='uniform',
    crossover_prob=0.8,
    mutation_method='bit_flip',
    mutation_prob=0.01,
    adaptive_mutation=False,
    elitism_rate=0.1,
    fitness_cfg={
        'accuracy_weight': 0.7,
        'feature_reduction_weight': 0.3,
        'penalty_threshold': 0.95
    },
    eval_cfg={
        'cv_folds': 3,  # Reduced for speed
        'random_state': 42,
        'metrics': ['accuracy', 'f1_score']
    },
    random_state=42
)

print("GA Configuration:")
print(f"  Population size: {ga.population_size}")
print(f"  Generations: {ga.n_generations}")
print(f"  Selection: {ga.selection_method}")
print(f"  Crossover: {ga.crossover_method}")
print(f"  Mutation: {ga.mutation_method}")
print(f"  Elitism rate: {ga.elitism_rate}")
print(f"  CV folds: {ga.eval_cfg['cv_folds']}")

GA Configuration:
  Population size: 20
  Generations: 10
  Selection: tournament
  Crossover: uniform
  Mutation: bit_flip
  Elitism rate: 0.1
  CV folds: 3


In [10]:
# Run GA feature selection
print("\nRunning GA Feature Selection...")
print("=" * 60)

best_mask, history = ga.evolve(X_train, y_train)

print("=" * 60)
print("\nGA Evolution Complete!")
print(f"Best feature subset: {np.sum(best_mask)} features selected out of {len(best_mask)}")
print(f"Feature reduction: {(1 - np.sum(best_mask)/len(best_mask)) * 100:.1f}%")
print(f"Final best fitness: {history['best_fitness'][-1]:.4f}")


Running GA Feature Selection...
Generation 1/10: Best Fitness = 0.2634, Avg Fitness = 0.2176, Features = 20/51
Generation 10/10: Best Fitness = 0.3275, Avg Fitness = 0.3116, Features = 8/51

GA Evolution Complete!
Best feature subset: 8 features selected out of 51
Feature reduction: 84.3%
Final best fitness: 0.3275


## 4. Train Final Model with GA-Selected Features

In [11]:
# Apply feature mask to data
X_train_selected = X_train[:, best_mask == 1]
X_test_selected = X_test[:, best_mask == 1]

print(f"Selected features shape: {X_train_selected.shape}")

# Train classifier on selected features
ga_clf = get_decision_tree(max_depth=10, min_samples_split=5, random_state=42)
ga_clf.fit(X_train_selected, y_train)

# Predictions
y_pred_ga = ga_clf.predict(X_test_selected)

# Metrics
ga_accuracy = accuracy_score(y_test, y_pred_ga)
ga_f1 = f1_score(y_test, y_pred_ga, average='weighted')  # weighted for multiclass

print("\n" + "=" * 60)
print("GA-SELECTED FEATURES")
print("=" * 60)
print(f"Number of features used: {X_train_selected.shape[1]} (vs {X_train.shape[1]} baseline)")
print(f"Feature reduction: {(1 - X_train_selected.shape[1]/X_train.shape[1]) * 100:.1f}%")
print(f"Test Accuracy: {ga_accuracy:.4f} (vs {baseline_accuracy:.4f} baseline)")
print(f"Test F1-Score (weighted): {ga_f1:.4f} (vs {baseline_f1:.4f} baseline)")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_ga))

Selected features shape: (1503, 8)

GA-SELECTED FEATURES
Number of features used: 8 (vs 51 baseline)
Feature reduction: 84.3%
Test Accuracy: 0.1117 (vs 0.0824 baseline)
Test F1-Score (weighted): 0.0874 (vs 0.0739 baseline)

Classification Report:
              precision    recall  f1-score   support

           0       0.11      0.08      0.09        39
           1       0.12      0.05      0.07        40
           2       0.12      0.30      0.17        37
           3       0.00      0.00      0.00        34
           4       0.13      0.05      0.08        37
           5       0.12      0.42      0.18        36
           6       0.05      0.03      0.04        38
           7       0.20      0.08      0.12        37
           8       0.13      0.13      0.13        39
           9       0.00      0.00      0.00        39

    accuracy                           0.11       376
   macro avg       0.10      0.11      0.09       376
weighted avg       0.10      0.11      0.09      

## 5. Summary Comparison

In [12]:
print("\n" + "=" * 70)
print("FINAL COMPARISON: BASELINE vs GA FEATURE SELECTION")
print("=" * 70)
print(f"{'Metric':<30} {'Baseline':<20} {'GA-Selected':<20}")
print("-" * 70)
print(f"{'Number of Features':<30} {X_train.shape[1]:<20} {X_train_selected.shape[1]:<20}")
print(f"{'Feature Reduction (%)':<30} {0:<20} {(1 - X_train_selected.shape[1]/X_train.shape[1]) * 100:<20.1f}")
print(f"{'Test Accuracy':<30} {baseline_accuracy:<20.4f} {ga_accuracy:<20.4f}")
print(f"{'Test F1-Score':<30} {baseline_f1:<20.4f} {ga_f1:<20.4f}")
print(f"{'Accuracy Change':<30} {'':<20} {(ga_accuracy - baseline_accuracy):<+20.4f}")
print("=" * 70)

print("\n✓ GA Implementation Test Complete!")
print(f"✓ Successfully selected {np.sum(best_mask)} features")
print(f"✓ Achieved {(1 - np.sum(best_mask)/len(best_mask)) * 100:.1f}% feature reduction")
print(f"✓ Final fitness: {history['best_fitness'][-1]:.4f}")


FINAL COMPARISON: BASELINE vs GA FEATURE SELECTION
Metric                         Baseline             GA-Selected         
----------------------------------------------------------------------
Number of Features             51                   8                   
Feature Reduction (%)          0                    84.3                
Test Accuracy                  0.0824               0.1117              
Test F1-Score                  0.0739               0.0874              
Accuracy Change                                     +0.0293             

✓ GA Implementation Test Complete!
✓ Successfully selected 8 features
✓ Achieved 84.3% feature reduction
✓ Final fitness: 0.3275
