<H1>Filter Method</H1>

<H2>Variance Threshold</H2>

In [1]:
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
#df = df.drop(columns=['data_source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Variance Threshold Function
def variance_threshold(X, threshold=0.1):
    selector = VarianceThreshold(threshold)
    selector.fit(X)
    return X.columns[selector.get_support()]

# Apply Variance Threshold
selected_features = variance_threshold(X)

# Print the features selected by Variance Threshold
print("Features Selected by Variance Threshold:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_selected.shape[1], 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32)  # Adjust epochs and batch size as needed

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Features Selected by Variance Threshold:
Running Nose
Asthma
Chronic Lung Disease
Headache
Heart Disease
Diabetes
Hyper Tension
Abroad travel
Contact with COVID Patient
Attended Large Gathering
Visited Public Exposed Places
Family working in Public Exposed Places
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.8751 - loss: 0.3497
Epoch 2/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9366 - loss: 0.1878
Epoch 3/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9407 - loss: 0.1798
Epoch 4/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9459 - loss: 0.1736
Epoch 5/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9440 - loss: 0.1748
Epoch 6/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9472 - loss: 0.1713
Epoch 7/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9463 - loss: 0.1703
Epoch 8/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9475 - loss: 0.1704
Epoch 9/10
[1m1157/1157[0m [32m━

<H2>Chi Squared Test</H2>

In [8]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
#df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
df = df.drop(columns=['source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Chi-squared Test Function
def chi_squared(X, y, k=10):  # Set k to the number of features you want to select
    chi2_selector = SelectKBest(chi2, k=k)
    chi2_selector.fit(X, y)
    return X.columns[chi2_selector.get_support()]

# Apply Chi-squared Test
selected_features = chi_squared(X, y, k=10)  # Change k to the desired number of features

# Print the features selected by Chi-squared Test
print("Features Selected by Chi-squared Test:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_selected.shape[1], 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=10000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])  # Adjust epochs and batch size as needed

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Features Selected by Chi-squared Test:
Asthma
Chronic Lung Disease
Headache
Heart Disease
Diabetes
Abroad travel
Contact with COVID Patient
Attended Large Gathering
Visited Public Exposed Places
Family working in Public Exposed Places
Epoch 1/10000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8670 - loss: 0.3655 - val_accuracy: 0.9246 - val_loss: 0.2056
Epoch 2/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9340 - loss: 0.1900 - val_accuracy: 0.9226 - val_loss: 0.1991
Epoch 3/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9352 - loss: 0.1921 - val_accuracy: 0.9399 - val_loss: 0.1930
Epoch 4/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9417 - loss: 0.1851 - val_accuracy: 0.9338 - val_loss: 0.1936
Epoch 5/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9401 - loss: 0.1900 - val_accuracy: 0.9398 - val_loss: 0.1917
Epoch 6/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9429 - loss: 0.1831 - val_accuracy: 0.9414 - val_loss: 0.1896
Epoch 7/10000
[1m926/92

In [10]:
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
#df = df.drop(columns=['source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Mutual Information Function
def mutual_information(X, y, num_features=10):  # Set num_features to the number of features you want to select
    mi = mutual_info_classif(X, y, discrete_features='auto')
    mi_series = pd.Series(mi, index=X.columns)
    return mi_series.nlargest(num_features).index  # Select the top num_features

# Apply Mutual Information
selected_features = mutual_information(X, y, num_features=10)  # Change num_features to the desired number of features

# Print the features selected by Mutual Information
print("Features Selected by Mutual Information:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_selected.shape[1], 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32)  # Adjust epochs and batch size as needed
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=10000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])  # Adjust epochs and batch size as needed

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Features Selected by Mutual Information:
Abroad travel
Attended Large Gathering
Diabetes
Family working in Public Exposed Places
Visited Public Exposed Places
Asthma
Headache
Heart Disease
Chronic Lung Disease
Contact with COVID Patient
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.8347 - loss: 0.3884
Epoch 2/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9350 - loss: 0.2040
Epoch 3/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9410 - loss: 0.1933
Epoch 4/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9436 - loss: 0.1876
Epoch 5/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9410 - loss: 0.1924
Epoch 6/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9452 - loss: 0.1826
Epoch 7/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9447 - loss: 0.1856
Epoch 8/10
[1m1157/1157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9436 - loss: 0.1856
Epoch 9/10
[1m1157/1157[0m [32m━

<H1>Mutual Information</H1>

In [12]:
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
# df = df.drop(columns=['source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Mutual Information Function
def mutual_information(X, y, num_features=10):
    """
    Calculate mutual information between features and target variable,
    and return the top num_features based on mutual information scores.
    
    Parameters:
    - X: DataFrame of features
    - y: Series of target variable
    - num_features: Number of top features to select based on mutual information
    
    Returns:
    - List of selected feature names
    """
    mi = mutual_info_classif(X, y, discrete_features='auto')
    mi_series = pd.Series(mi, index=X.columns)
    return mi_series.nlargest(num_features).index.tolist()  # Return as a list

# Apply Mutual Information
num_features_to_select = 10  # Change this to the desired number of features
selected_features = mutual_information(X, y, num_features=num_features_to_select)

# Print the features selected by Mutual Information
print("Features Selected by Mutual Information:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_selected.shape[1], 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=10000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])  # Adjust epochs and batch size as needed

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Features Selected by Mutual Information:
Abroad travel
Attended Large Gathering
Diabetes
Family working in Public Exposed Places
Visited Public Exposed Places
Asthma
Headache
Heart Disease
Chronic Lung Disease
Breathing Problem
Epoch 1/10000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8030 - loss: 0.4288 - val_accuracy: 0.9142 - val_loss: 0.2374
Epoch 2/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9109 - loss: 0.2281 - val_accuracy: 0.9236 - val_loss: 0.2214
Epoch 3/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9282 - loss: 0.2147 - val_accuracy: 0.9267 - val_loss: 0.2177
Epoch 4/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9317 - loss: 0.2066 - val_accuracy: 0.9271 - val_loss: 0.2161
Epoch 5/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9305 - loss: 0.2072 - val_accuracy: 0.9179 - val_loss: 0.2159
Epoch 6/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9325 - loss: 0.2088 - val_accuracy: 0.9367 - val_loss: 0.2132
Epoch 7/10000
[1m926/92

In [14]:
import pandas as pd
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
# df = df.drop(columns=['source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Information Gain Function using SelectKBest
def information_gain(X, y, num_features=10):
    """
    Calculate Information Gain (using mutual information) between features and target variable,
    and return the top num_features based on Information Gain scores.
    
    Parameters:
    - X: DataFrame of features
    - y: Series of target variable
    - num_features: Number of top features to select based on Information Gain
    
    Returns:
    - List of selected feature names
    """
    selector = SelectKBest(score_func=mutual_info_classif, k=num_features)
    selector.fit(X, y)
    selected_indices = selector.get_support(indices=True)
    return X.columns[selected_indices].tolist()  # Return as a list

# Apply Information Gain
num_features_to_select = 10  # Change this to the desired number of features
selected_features = information_gain(X, y, num_features=num_features_to_select)

# Print the features selected by Information Gain
print("Features Selected by Information Gain:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_selected.shape[1], 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Fit the model with early stopping
model.fit(X_train, y_train, epochs=10000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])  # Adjust epochs and batch size as needed

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

Features Selected by Information Gain:
Asthma
Chronic Lung Disease
Headache
Heart Disease
Diabetes
Abroad travel
Contact with COVID Patient
Attended Large Gathering
Visited Public Exposed Places
Family working in Public Exposed Places
Epoch 1/10000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8779 - loss: 0.3765 - val_accuracy: 0.9215 - val_loss: 0.2042
Epoch 2/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9311 - loss: 0.1938 - val_accuracy: 0.9267 - val_loss: 0.2037
Epoch 3/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9348 - loss: 0.1941 - val_accuracy: 0.9384 - val_loss: 0.1938
Epoch 4/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9384 - loss: 0.1910 - val_accuracy: 0.9399 - val_loss: 0.1915
Epoch 5/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9420 - loss: 0.1836 - val_accuracy: 0.9407 - val_loss: 0.1915
Epoch 6/10000
[1m926/926[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9426 - loss: 0.1854 - val_accuracy: 0.9404 - val_loss: 0.1889
Epoch 7/10000
[1m926/92

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense, Input
from keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "Downloads/preprocessed_covid500.csv"  # Update with the correct path to your dataset
df = pd.read_csv(file_path)

# Drop the 'source' feature if it exists
# df = df.drop(columns=['source'], errors='ignore')

# Separate features and target variable
X = df.drop(columns=['COVID-19'])
y = df['COVID-19']

# Cumulative Feature Selection Function
def cumulative_feature_selection(X, y, max_features=10):
    selected_features = []
    feature_indices = list(range(X.shape[1]))
    
    for _ in range(max_features):
        best_accuracy = 0
        best_feature = None
        
        for i in feature_indices:
            # Temporarily add the feature
            temp_features = selected_features + [X.columns[i]]
            X_temp = X[temp_features].values.reshape(X.shape[0], len(temp_features), 1)
            
            # Split the data
            X_train, X_test, y_train, y_test = train_test_split(X_temp, y, test_size=0.2, random_state=42)
            
            # Build and train the model
            model = Sequential()
            model.add(Input(shape=(len(temp_features), 1)))  # Use Input layer to define input shape
            model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))  # Use kernel_size=1 for single feature
            model.add(Flatten())
            model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem
            
            model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
            
            # Early stopping callback
            early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
            
            # Fit the model
            model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=0)
            
            # Evaluate the model
            y_pred = model.predict(X_test)
            y_pred_classes = (y_pred > 0.5).astype(int)
            accuracy = accuracy_score(y_test, y_pred_classes)
            
            # Check if this feature improves accuracy
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = X.columns[i]
        
        # Add the best feature found in this iteration
        if best_feature is not None:
            selected_features.append(best_feature)
            feature_indices.remove(X.columns.get_loc(best_feature))  # Remove the selected feature from the pool
            print(f"Selected feature: {best_feature} with accuracy: {best_accuracy:.4f}")
    
    return selected_features

# Apply Cumulative Feature Selection
max_features_to_select = 10  # Change this to the desired maximum number of features
selected_features = cumulative_feature_selection(X, y, max_features=max_features_to_select)

# Print the features selected by Cumulative Feature Selection
print("Features Selected by Cumulative Feature Selection:")
for feature in selected_features:
    print(feature)

# Filter the dataset to keep only the selected features
X_selected = X[selected_features]

# Prepare Data for 1D CNN
X_selected = X_selected.values.reshape(X_selected.shape[0], X_selected.shape[1], 1)  # Reshape for CNN
y = y.values  # Convert target variable to numpy array

# Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Build and Train the 1D CNN Model
model = Sequential()
model.add(Input(shape=(X_selected.shape[1], 1)))  # Use Input layer to define input shape
model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))  # Use kernel_size=1 for single feature
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))  # Adjust output layer based on your problem

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Fit the model
model.fit(X_train, y_train, epochs=10000, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the Model
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class predictions

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print(f'Accuracy: {accuracy:.4f}')

[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m290/290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


KeyboardInterrupt: 