In [1]:
# https://towardsdatascience.com/tune-your-scikit-learn-model-using-evolutionary-algorithms-30538248ac16
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd


songs = pd.read_csv('data/spotify_simplified.csv', index_col=[0])
songs_data = songs.drop(columns = ["track_id", "artists", "album_name", "track_name", "track_genre"])
genres = songs["track_genre"]
# Numerically encode the labels
label_encoder = LabelEncoder()
encoded_genres = label_encoder.fit_transform(genres)
X_train, X_test, y_train, y_test = train_test_split(songs_data, encoded_genres, test_size=0.3, 
                                                    stratify=encoded_genres, shuffle=True, random_state=100)

clf = DecisionTreeClassifier()

In [6]:
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Continuous, Categorical, Integer
from sklearn_genetic.plots import plot_fitness_evolution, plot_search_space
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

param_grid = {'max_depth': Integer(40, 100), 
              'min_samples_leaf': Integer(1, 100), 
              'min_samples_split': Integer(2, 100)}

cv = StratifiedKFold(n_splits=5, shuffle=True)

evolved_estimator = GASearchCV(estimator=clf,
                               cv=cv,
                               scoring='f1_weighted',
                               population_size=20,
                               generations=35,
                               tournament_size=3,
                               elitism=True,
                               crossover_probability=0.8,
                               mutation_probability=0.1,
                               param_grid=param_grid,
                               criteria='max',
                               algorithm='eaMuPlusLambda',
                               n_jobs=-1,
                               verbose=True,
                               keep_top_k=4)

In [7]:
from sklearn.metrics import f1_score

evolved_estimator.fit(X_train,y_train)
y_predicy_ga = evolved_estimator.predict(X_test)
f1_score(y_test,y_predicy_ga, average='weighted')

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	10    	0.173386	0.00497263 	0.181858   	0.162708   
1  	18    	0.175371	0.00187431 	0.178521   	0.172349   
2  	20    	0.177237	0.00163904 	0.18124    	0.174779   
3  	19    	0.178123	0.00115351 	0.18124    	0.17713    
4  	19    	0.178089	0.00115678 	0.181239   	0.177296   
5  	19    	0.177299	0.00209411 	0.181239   	0.173803   
6  	20    	0.181329	0.00383453 	0.18521    	0.173803   
7  	18    	0.183335	0.0030876  	0.18521    	0.177148   
8  	19    	0.179817	0.00392387 	0.18521    	0.175257   
9  	18    	0.180123	0.0043017  	0.18521    	0.175257   
10 	17    	0.18091 	0.00435129 	0.18521    	0.175196   
11 	20    	0.18038 	0.00400664 	0.18521    	0.17521    
12 	17    	0.179495	0.00379569 	0.18521    	0.17521    
13 	18    	0.180983	0.00398169 	0.18521    	0.175654   
14 	15    	0.180866	0.00437531 	0.18521    	0.175102   
15 	18    	0.180074	0.0042227  	0.18521    	0.175357   
16 	17    	0.181361	0.00514996 	0.18521    	0.17

0.17657745933756377