In [11]:
from sklearn_genetic import GAFeatureSelectionCV, ExponentialAdapter
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pandas as pd

In [12]:
# Load data
data = pd.read_csv('emotions.csv')
X = data.drop('label', axis=1)
y = data['label']

In [13]:
# Add noise
noise = np.random.uniform(5, 10, size=(X.shape[0], 5))
X = np.hstack((X, noise))

In [14]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [15]:
# Initialize Logistic Regression classifier
clf = LogisticRegression(random_state=0)

In [16]:
# Define mutation and crossover schedulers
mutation_scheduler = ExponentialAdapter(0.8, 0.2, 0.01)
crossover_scheduler = ExponentialAdapter(0.2, 0.8, 0.01)

In [17]:
# Initialize Genetic Algorithm for Feature Selection
evolved_estimator = GAFeatureSelectionCV(
    estimator=clf,
    scoring="accuracy",
    population_size=30,
    generations=20,
    mutation_probability=mutation_scheduler,
    crossover_probability=crossover_scheduler,
    n_jobs=-1
)

In [18]:
# Reshape y_train if needed
y_train = y_train.values.reshape(-1, 1)

In [19]:
# Fit the model
evolved_estimator.fit(X_train, y_train.ravel())

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	30    	0.342437	1.11022e-16	0.342437   	0.342437   
1  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
2  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
3  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
4  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
5  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
6  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
7  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
8  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
9  	60    	0.342437	1.11022e-16	0.342437   	0.342437   
10 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
11 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
12 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
13 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
14 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
15 	60    	0.342437	1.11022e-16	0.342437   	0.342437   
16 	60    	0.342437	1.11022e-16	0.342437   	0.34

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
# Get selected features
features = evolved_estimator.support_
print("Selected Features:", features)

Selected Features: [ True  True  True ... False False False]


In [22]:
# Make predictions
y_predict_ga = evolved_estimator.predict(X_test)

In [23]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_predict_ga)
print("Accuracy:", accuracy)

Accuracy: 0.31107954545454547


In [24]:
# Calculate precision, recall, and F1 score
precision = precision_score(y_test, y_predict_ga, average='weighted')
recall = recall_score(y_test, y_predict_ga, average='weighted')
f1 = f1_score(y_test, y_predict_ga, average='weighted')

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Precision: 0.09677048360020661
Recall: 0.31107954545454547
F1 Score: 0.14761954594701077


  _warn_prf(average, modifier, msg_start, len(result))
