In [1]:
from sklearn_genetic import GAFeatureSelectionCV, ExponentialAdapter
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('emotions.csv')
X = data.drop('label', axis=1)
y = data['label']
noise = np.random.uniform(5, 10, size=(X.shape[0], 5))
X = np.hstack((X, noise))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [3]:
clf = RandomForestClassifier(n_estimators=100, random_state=0)

In [4]:
# Set up mutation and crossover rate schedulers
mutation_scheduler = ExponentialAdapter(0.5, 0.2, 0.01)
crossover_scheduler = ExponentialAdapter(0.5, 0.8, 0.01)

In [5]:
# Set up the Genetic Algorithm Feature Selection
evolved_estimator = GAFeatureSelectionCV(
    estimator=clf,
    scoring="accuracy",
    population_size=50,  # Adjusted population size
    generations=30,  # Adjusted number of generations
    mutation_probability=mutation_scheduler,
    crossover_probability=crossover_scheduler,
    n_jobs=-1
)

In [6]:
# Flatten y_train if needed
y_train = y_train.values.ravel()

In [7]:
# Fit the model
evolved_estimator.fit(X_train, y_train)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	50    	0.979622	0.00234463 	0.984594   	0.97479    
1  	100   	0.981653	0.00182073 	0.984594   	0.977591   
2  	100   	0.982423	0.00162881 	0.985294   	0.979692   
3  	100   	0.982633	0.0016152  	0.985294   	0.979692   
4  	100   	0.983375	0.00144067 	0.985994   	0.979692   
5  	100   	0.983081	0.00148459 	0.985294   	0.980392   
6  	100   	0.983669	0.00147185 	0.985994   	0.979692   
7  	100   	0.983557	0.00166449 	0.985994   	0.979692   
8  	100   	0.983725	0.001525   	0.985994   	0.978992   
9  	100   	0.983529	0.0017949  	0.985994   	0.978992   
10 	100   	0.983487	0.00180498 	0.985994   	0.980392   
11 	100   	0.983964	0.00145719 	0.985994   	0.978992   
12 	100   	0.983768	0.00157019 	0.985994   	0.979692   
13 	100   	0.983838	0.00125145 	0.985994   	0.981092   
14 	100   	0.983739	0.00153609 	0.985994   	0.980392   
15 	100   	0.983641	0.00164385 	0.985994   	0.978992   
16 	100   	0.983894	0.00159688 	0.985994   	0.97

In [8]:
# Get selected features
features = evolved_estimator.support_
print("Selected Features:", features)

Selected Features: [False  True  True ... False False  True]


In [9]:
# Predict using the selected features
y_predict_ga = evolved_estimator.predict(X_test)

In [10]:
# Calculate and print accuracy
print("Accuracy:", accuracy_score(y_test, y_predict_ga))

Accuracy: 0.9801136363636364


In [11]:
# Transform the test set with selected features
X_reduced = evolved_estimator.transform(X_test)
print("Number of Selected Features:", len(features))

Number of Selected Features: 2553
