In [1]:
import ast
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score, KFold
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV

import joblib

In [2]:
# abrindo os dados de treinamento
df = pd.read_csv("./../ansatz_result/data.csv")
X = df.drop(columns=["target"]).to_numpy()
y = pd.DataFrame(df['target'].apply(ast.literal_eval).tolist()).to_numpy()

# Para cada amostra, identificar o ansatz com maior acurácia
y_best_ansatz = np.argmax(y, axis=1)  # Retorna índices 0-29

In [4]:
# criando kfold
kf = KFold(n_splits=3, shuffle=True, random_state=42)

# Definir os parâmetros a testar
param_grid = {
    'max_depth': [3, 5, 7, None],
    'min_samples_split': [2, 5, 10, 15, 20, 25, 30],
    'min_samples_leaf': [1, 2, 4, 5,  10, 15, 20, 30, 35, 40],
    'criterion': ['squared_error', 'friedman_mse'],
    'n_estimators': [5, 10, 20, 30, 50, 80, 100]
}

gb_classifier = GradientBoostingClassifier()

grid_search = GridSearchCV(gb_classifier, param_grid, cv=kf, scoring='accuracy', n_jobs=20)

grid_search.fit(X, y_best_ansatz)

# Melhores parâmetros
print("Melhores parâmetros:", grid_search.best_params_)

Melhores parâmetros: {'criterion': 'squared_error', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 25, 'n_estimators': 5}


In [5]:
# Usar os melhores parâmetros encontrados para validação cruzada
best_gb_classifier = grid_search.best_estimator_

score = cross_val_score(best_gb_classifier, X, y_best_ansatz, cv=kf, scoring='accuracy')
print("Scores:", score)
print("Média:", score.mean())
print("Desvio padrão:", score.std())

Scores: [0.38333333 0.35       0.30833333]
Média: 0.34722222222222227
Desvio padrão: 0.030681558381075724


In [6]:
#probabilidades
probabilities = best_gb_classifier.predict_proba(X[6].reshape(1, -1))  # Shape: (n_amostras, n_classes)

# Top 3 ansatzes mais prováveis para a primeira amostra
top_3 = np.argsort(-probabilities, axis=1)[:, :3]
print(f"Top 3 ansatzes para a amostra: {top_3[0]}")
print(f"Probabilidades: {probabilities[0][top_3[0]]}")

Top 3 ansatzes para a amostra: [ 0  4 12]
Probabilidades: [0.2924588  0.22066768 0.07205703]


In [8]:
# Salvar modelo
joblib.dump(best_gb_classifier, './models_salvos/gb_clf.joblib')

['./models_salvos/gb_clf.joblib']

Exception ignored in: <function ResourceTracker.__del__ at 0x7f3bb2a8ade0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7a21d3e8ede0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x79e236b8ede0>
Traceback (most recent call last):
  File "/usr/lib/python3.13/multiprocessing/reso