In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Load the dataset (ensure your dataset is preprocessed and cleaned)
data = pd.read_csv('dataset.csv')

In [3]:
# Separate features (X) and target (y)
X = data.drop(columns=['target'])  # Drop the target column 'stroke'
y = data['target']  # Target variable

In [4]:
# Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Standardize the features (neural networks perform better with standardized data)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Convert scaled data back into a DataFrame for easier feature selection
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X.columns)

In [8]:
# Function to create ANN model
def create_ann(input_shape):
    model = Sequential()
    model.add(Dense(16, activation='relu', input_shape=(input_shape,)))  # Input layer and first hidden layer
    model.add(Dense(8, activation='relu'))  # Second hidden layer
    model.add(Dense(1, activation='sigmoid'))  # Output layer (binary classification)
    
    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [9]:
# Initialize empty lists for selected features and remaining features
selected_features = []
remaining_features = list(X_train_scaled.columns)

In [10]:
# Best score tracker
best_score = 0

In [14]:
# Forward feature selection loop
while remaining_features:
    best_feature = None
    for feature in remaining_features:
        # Create temporary list of selected features + current feature
        temp_features = selected_features + [feature]
        
        # Prepare training and test sets with the selected features
        X_train_temp = X_train_scaled[temp_features]
        X_test_temp = X_test_scaled[temp_features]
        
        # Create and train ANN model
        model = create_ann(input_dim=X_train_temp.shape[1])
        model.fit(X_train_temp, y_train, epochs=10, batch_size=32, verbose=0)  # Train for 10 epochs
        
        # Predict on test set
        y_pred = (model.predict(X_test_temp) > 0.5).astype("int32")  # Convert probabilities to 0/1
        
        # Calculate accuracy score
        score = accuracy_score(y_test, y_pred)
        
        # If this feature improves the score, update best_feature and best_score
        if score > best_score:
            best_score = score
            best_feature = feature
    
    # If a feature improved the score, add it to selected features and remove from remaining features
    if best_feature:
        selected_features.append(best_feature)
        remaining_features.remove(best_feature)
        print(f'ADDED FEATURE: {best_feature}, SCORE: {best_score:.4f}')
    else:
        # Stop if no feature improves the score
        break

TypeError: create_ann() got an unexpected keyword argument 'input_dim'