In [20]:
from sklearn_genetic import GAFeatureSelectionCV, ExponentialAdapter
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
import numpy as np
import pandas as pd
import time

In [21]:
data = pd.read_csv('emotions.csv')
X = data.drop('label', axis=1)
y = data['label']

In [22]:
# Adding noise to the features
noise = np.random.uniform(5, 10, size=(X.shape[0], 5))
X = np.hstack((X, noise))

In [23]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [24]:
# Initialize SVM classifier
clf = SVC(gamma='auto')

In [25]:
# Experiment with different values for Genetic Algorithm parameters
mutation_scheduler = ExponentialAdapter(0.8, 0.2, 0.01)
crossover_scheduler = ExponentialAdapter(0.2, 0.8, 0.01)

In [26]:
# Adjust these parameters for optimization
population_size = 50
generations = 30

In [27]:
# Initialize GAFeatureSelectionCV with optimized parameters
evolved_estimator = GAFeatureSelectionCV(
    estimator=clf,
    scoring="accuracy",
    population_size=population_size,
    generations=generations,
    mutation_probability=mutation_scheduler,
    crossover_probability=crossover_scheduler,
    n_jobs=-1
)

In [28]:
# Reshape y_train if needed
y_train = y_train.values.reshape(-1, 1)
y_train = y_train.ravel()

In [29]:
# Measure the start time
start_time = time.time()

In [30]:
# Fit the model
evolved_estimator.fit(X_train, y_train)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	50    	0.342437	1.11022e-16	0.342437   	0.342437   
1  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
2  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
3  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
4  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
5  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
6  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
7  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
8  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
9  	100   	0.342437	1.11022e-16	0.342437   	0.342437   
10 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
11 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
12 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
13 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
14 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
15 	100   	0.342437	1.11022e-16	0.342437   	0.342437   
16 	100   	0.342437	1.11022e-16	0.342437   	0.34

In [31]:
# Measure the end time
end_time = time.time()

In [32]:
# Calculate the training time cost
training_time = end_time - start_time
print("Training Time:", training_time, "seconds")

Training Time: 13138.589313983917 seconds


In [33]:
# Get selected features
features = evolved_estimator.support_
print("Selected Features:", features)

Selected Features: [ True False  True ... False  True  True]


In [34]:
# Predict using the evolved estimator
y_predict_ga = evolved_estimator.predict(X_test)

In [35]:
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_predict_ga)
print("Accuracy:", accuracy)

Accuracy: 0.31107954545454547


In [36]:
# Evaluate recall
recall = recall_score(y_test, y_predict_ga, average='weighted')
print("Recall:", recall)

# Evaluate precision
precision = precision_score(y_test, y_predict_ga, average='weighted')
print("Precision:", precision)

# Evaluate F1 score
f1 = f1_score(y_test, y_predict_ga, average='weighted')
print("F1 Score:", f1)

# Transform the test set based on selected features
X_reduced = evolved_estimator.transform(X_test)
print("Number of Selected Features:", len(features))

Recall: 0.31107954545454547
Precision: 0.09677048360020661
F1 Score: 0.14761954594701077
Number of Selected Features: 2553


  _warn_prf(average, modifier, msg_start, len(result))
