In [1]:
!pip install sklearn-genetic



In [2]:
!pip install scikit-learn



In [3]:
from sklearn_genetic import GAFeatureSelectionCV, ExponentialAdapter
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [4]:
data = pd.read_csv('emotions.csv')
X = data.drop('label', axis=1)
y = data['label']

In [5]:
noise = np.random.uniform(5, 10, size=(X.shape[0], 5))
X = np.hstack((X, noise))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [7]:
clf = RandomForestClassifier(n_estimators=100, random_state=0)

In [8]:
mutation_scheduler = ExponentialAdapter(0.8, 0.2, 0.01)
crossover_scheduler = ExponentialAdapter(0.2, 0.8, 0.01)

In [9]:
evolved_estimator = GAFeatureSelectionCV(
    estimator=clf,
    scoring="accuracy",
    population_size=30,
    generations=20,
    mutation_probability=mutation_scheduler,
    crossover_probability=crossover_scheduler,
    n_jobs=-1
)

In [10]:
y_train = y_train.values.ravel()

In [None]:
evolved_estimator.fit(X_train, y_train)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	30    	0.978828	0.00184111 	0.981793   	0.973389   
1  	60    	0.980439	0.00130301 	0.982493   	0.977591   
2  	60    	0.981162	0.00125726 	0.983193   	0.978291   
3  	60    	0.981232	0.0013458  	0.984594   	0.978291   
4  	60    	0.981536	0.00148534 	0.984594   	0.978291   
5  	60    	0.981769	0.000894667	0.983193   	0.980392   
6  	60    	0.981653	0.00112045 	0.983894   	0.978992   


In [None]:
features = evolved_estimator.support_
print("Selected Features:", features)

In [None]:
y_predict_ga = evolved_estimator.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_predict_ga))

In [None]:
X_reduced = evolved_estimator.transform(X_test)