In [1]:
import sys
import os
sys.path.append(os.path.abspath(".."))  # sobe um nível a partir da pasta models/

In [None]:
import ast
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import pandas as pd
from sklearn.model_selection import KFold
import joblib
from features_selection.Wrapper import ForwardFeatureSelector, BackwardFeatureSelector

In [3]:
# abrindo os dados de treinamento
df = pd.read_csv("./../ansatz_result/data.csv")
X = df.drop(columns=["target"]).to_numpy()
y = pd.DataFrame(df['target'].apply(ast.literal_eval).tolist()).to_numpy()

# Para cada amostra, identificar o ansatz com maior acurácia
y_best_ansatz = np.argmax(y, axis=1)  # Retorna índices 0-29


In [4]:
print(y_best_ansatz)

[ 6  0  6  0 11 14  0 14 14  0  0  2  0  6  6 14  2  6  0 11  2 14 14 16
  2  6  6  2 11  6 14  6 16 14 14 14 14  8 14  6  6 14  6 14  6  8 14  6
  6  6 14  6  6  6  6  6  8  6 14  0  6  6  6  6  6  1  6  6  6  2  6  6
  6  6  6  6  6  2  2  6  6  6  6  6  6  6  2  6  2  6 13  0 12  0  7  7
  7  8  0  7  7 12  7  8 21 12 13  7  7  7 12 17  7 13 21  7  7  7  6  7
  6 14  6 25  6  6  1 14  1  6  6  6  6 14 16  6  6  6  6  6  6  6 14  1
  6  6  6 14  6 14  6  6 14 14 14  6 14 14  6  6  6  6 14  6  6  6  6  6
 14  6  6  6  6 14  6  6  6  6  6  6 19 20  3 22 11 19  1  6 20 29 20  0
  0 19 28 29  6 11  6 27  7 17 20 26  0 22 17  3  0 29 13  2 27  0  7 10
 24 24 20  6 10 22 24 22 10 10 14  8 15 17 10 22  6  0 11  2 12 22 27 20
  2  0 15  0 29 10 12  2 17  6  3 29 16  0 11  0  0  7  7  0  6 10 14  0
  6  6  0  3 10  0  2 11 16 18  2 18 29  6 13 16 16 16 22  0 16 23  6 16
 20 18 18 16 16  0 11 16 16 17 16 20  6  8  3 11 11  6 22  3 22  0 29 11
  0 20  2  6 22 28 29  0 12 27 11 26 29 27 28  0 12

In [5]:
# criando kfold
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Definir os parâmetros a testar
param_grid = {
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10, 15, 20, 25, 30],
    'min_samples_leaf': [1, 2, 4, 5,  10, 15, 20, 30, 35, 40],
    'criterion': ['gini', 'entropy']
}

dt_classifier = DecisionTreeClassifier(random_state=45)

grid_search = GridSearchCV(dt_classifier, param_grid, cv=kf, scoring='accuracy', n_jobs=12)

grid_search.fit(X, y_best_ansatz)

# Melhores parâmetros
print("Melhores parâmetros:", grid_search.best_params_)

Melhores parâmetros: {'criterion': 'gini', 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 30}


In [6]:
# Usar os melhores parâmetros encontrados para validação cruzada
best_dt_classifier = grid_search.best_estimator_

score = cross_val_score(best_dt_classifier, X, y_best_ansatz, cv=kf, scoring='accuracy')
print("Scores:", score)
print("Média:", score.mean())
print("Desvio padrão:", score.std())

Scores: [0.35833333 0.35833333 0.35      ]
Média: 0.35555555555555557
Desvio padrão: 0.003928371006591943


In [7]:
# treinando o dt classifier e obtendo os top3 ansatz mais classificados
best_dt_classifier.fit(X, y_best_ansatz)

# Obter probabilidades de cada ansatz dado o input 6
probabilidades = best_dt_classifier.predict_proba(X)[6]

# Rankear os top-3 ansatz com maiores probabilidades
top3_indices = np.argsort(probabilidades)[-3:][::-1]  # Ordem decrescente
top3_ansatz = [(idx, probabilidades[idx]) for idx in top3_indices]

print("Top-3 Ansatz Previstos:")
for idx, prob in top3_ansatz:
    print(f"Ansatz {idx} (Probabilidade: {prob:.2f})")


Top-3 Ansatz Previstos:
Ansatz 0 (Probabilidade: 0.52)
Ansatz 2 (Probabilidade: 0.15)
Ansatz 8 (Probabilidade: 0.11)


In [8]:
selector = ForwardFeatureSelector(
    model=best_dt_classifier,
    model_type='sklearn',
    scoring='accuracy',
    cv = 3,
    verbose=1
)

In [9]:
X_new = selector.fit_transform(X, y_best_ansatz)

Testing feature set: [0]
Testing feature set: [1]
Testing feature set: [2]
Testing feature set: [3]
Testing feature set: [4]
Testing feature set: [5]
Testing feature set: [6]
Testing feature set: [7]
Testing feature set: [8]
Testing feature set: [9]
Testing feature set: [10]
Testing feature set: [11]
Testing feature set: [12]
Testing feature set: [13]
Testing feature set: [14]
Testing feature set: [15]
Testing feature set: [16]
Testing feature set: [17]
Testing feature set: [18]
Testing feature set: [19]
Testing feature set: [20]
Testing feature set: [21]
3
0.3361111111111111
Selected feature: 3 | Score: 0.3361
Testing feature set: [3, 0]
Testing feature set: [3, 1]
Testing feature set: [3, 2]
Testing feature set: [3, 4]
Testing feature set: [3, 5]
Testing feature set: [3, 6]
Testing feature set: [3, 7]
Testing feature set: [3, 8]
Testing feature set: [3, 9]
Testing feature set: [3, 10]
Testing feature set: [3, 11]
Testing feature set: [3, 12]
Testing feature set: [3, 13]
Testing featu

In [17]:
print(f"features selecionadas: {selector.selected_features}, best score: {selector.best_scores[-1]}")

features selecionadas: [3, 7], best score: 0.3527777777777778


Exception ignored in: <function ResourceTracker.__del__ at 0x7e59edc7ede0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x77a463e8ade0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x785e8c57ade0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/reso

In [10]:
selector2 = BackwardFeatureSelector(
    model=best_dt_classifier,
    model_type='sklearn',
    scoring='accuracy',
    cv = 3,
    verbose=1
)

In [11]:
X_new2 = selector2.fit_transform(X, y_best_ansatz)

Testando subconjunto: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto: [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Testando subconjunto

In [16]:
print(f"features selecionadas: {selector2.selected_features}, best score: {selector2.best_scores[-1]}")

features selecionadas: [1, 2, 4, 5, 6, 8, 10, 11, 12, 13, 15, 18, 19, 20, 21], best score: 0.37222222222222223


In [12]:
#salvando o modelo treinado
joblib.dump(best_dt_classifier, './models_salvos/dt_classifier.joblib')

['./models_salvos/dt_classifier.joblib']