In [1]:
import yaml
from sklearn.metrics import classification_report

from app.pool_classifiers import get_all_classifiers
from src.cca import learning_algorithm
from src.dataset import Dataset
from src.elements import Matrix, Pool
from src.helpers import dict_results_to_dataframe

In [2]:
with open("src/cca_config.yaml", "r") as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

In [3]:
dataset = Dataset(
    name=config["dataset"]["name"],
    train_clf_amount=config["dataset"]["train_amount"],
    train_cca_amount=config["dataset"]["train_amount"],
    test_amount=config["dataset"]["test_amount"],
)
x_train_clf, y_train_clf = dataset.get_train_clf_samples()
x_train_cca, y_train_cca = dataset.get_train_cca_samples()
x_test, y_test = dataset.get_test_samples()

#BALANCE OF SAMPLES
from src.helpers import count_items_from_predict

#Classes data train classifiers
y_train_clf_0 = count_items_from_predict(y_train_clf, 0)/len(y_train_clf)
y_train_clf_1 = count_items_from_predict(y_train_clf, 1)/len(y_train_clf)
print((y_train_clf_0, y_train_clf_1))

#Classes data train matrix CCA
y_train_cca_0 = count_items_from_predict(y_train_cca, 0)/len(y_train_cca)
y_train_cca_1 = count_items_from_predict(y_train_cca, 1)/len(y_train_cca)
print((y_train_cca_0, y_train_cca_1))

#Classes data train classifiers
y_test_0 = count_items_from_predict(y_test, 0)/len(y_test)
y_test_1 = count_items_from_predict(y_test, 1)/len(y_test)
print((y_test_0, y_test_1))

(0.835, 0.165)
(0.789, 0.211)
(0.8066666666666666, 0.19333333333333333)


In [4]:
all_classifiers = get_all_classifiers()
print("Total de classificadores existente no código: ", len(all_classifiers))
pool = Pool(all_classifiers)
pool.fit_all(x_train_clf, y_train_clf)
pool.test_all(x_test, y_test)
pool.remove_classifier_one_class()
results = pool.get_results()

results_df = dict_results_to_dataframe(results)

Total de classificadores existente no código:  487


If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), RidgeClassifier())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * n_samples. 
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), RidgeClassifier())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs

In [5]:
# pool.shuffle_classifiers()
pool.filter_by_f1(0.3)
print("Quantidade de classificadores: ", len(pool.classifiers))
max_classifier_f1 = max(pool.classifiers, key=lambda x: x.f1)
max_classifier_score = max(pool.classifiers, key=lambda x: x.score)
lista_classificadores_filtrados = [c.name for c in pool.classifiers]
pool.shuffle_classifiers()
matrix = Matrix(
    size=config["matrix"]["size"],
    pool=pool,
    init_enery=config["init_energy"],
    distance_neighborhood=config["matrix"]["distance"],
)

Quantidade de classificadores:  137


In [6]:
learning_algorithm(
    matrix=matrix,
    pool=pool,
    sample_features=x_train_cca,
    sample_class=y_train_cca,
    distance=config["matrix"]["distance"],
    interactions=config["interactions"],
    init_energy=config["init_energy"],
)

Interacao: 0
Total de mortes:  42
Interacao: 1
Total de mortes:  35
Interacao: 2
Total de mortes:  52
Interacao: 3
Total de mortes:  45
Interacao: 4
Total de mortes:  48
Interacao: 5
Total de mortes:  66
Interacao: 6
Total de mortes:  49
Interacao: 7
Total de mortes:  60
Interacao: 8
Total de mortes:  58
Interacao: 9
Total de mortes:  62
Interacao: 10
Total de mortes:  54
Interacao: 11
Total de mortes:  53
Interacao: 12
Total de mortes:  52
Interacao: 13
Total de mortes:  51
Interacao: 14
Total de mortes:  53
Interacao: 15
Total de mortes:  50
Interacao: 16
Total de mortes:  51
Interacao: 17
Total de mortes:  42
Interacao: 18
Total de mortes:  61
Interacao: 19
Total de mortes:  65


<src.elements.Matrix at 0x2315134f940>

In [7]:
# [[cell.get_classifier().name for cell in matrix.get()[i]] for i in range(8)]
[[cell.get_energy() for cell in matrix.get()[i]] for i in range(4)]

[[{0: 22010.183331504963, 1: 3310.4963210742085},
  {0: 25242.283851166532, 1: 769.5516211141112},
  {0: 10947.168011711588, 1: 1898.5308400309457},
  {0: 15723.756134040117, 1: 700.6648947108428},
  {0: 14993.978460398168, 1: 833.0611211208416},
  {0: 4423.838211099962, 1: 4609.42787442544},
  {0: 4013.7264390842197, 1: 4914.532375029941},
  {0: 15505.59607081821, 1: 1468.7568572940597}],
 [{0: 1895.8675882091165, 1: 8572.628180494528},
  {0: 6880.2676869331, 1: 4849.850638801397},
  {0: 14037.871524401111, 1: 3013.151896305285},
  {0: 4931.366296924201, 1: 4587.872474369026},
  {0: 14426.32402739078, 1: 7271.291434533626},
  {0: 9502.22793019527, 1: 3993.218733088436},
  {0: 20529.928425120823, 1: 789.3289242400516},
  {0: 19419.226541823133, 1: 2475.0076395040523}],
 [{0: 7103.766744063896, 1: 4408.611022131356},
  {0: 7131.441388817062, 1: 2373.1606541206033},
  {0: 3544.9372073098916, 1: 5156.2396223612395},
  {0: 7238.221112846828, 1: 4442.580738461867},
  {0: 10103.428789846888,

In [8]:
matrix_class = matrix.predict_matrix(x_test)


In [9]:
print(classification_report(y_test, matrix_class, digits=3))
# results_df_matrix
# matrix_class_0 = count_items_from_predict(matrix_class, 0)/len(matrix_class)
# matrix_class_1 = count_items_from_predict(matrix_class, 1)/len(matrix_class)
print("Qtd módulos com defeito na base: ", count_items_from_predict(y_test, 1))
print("Qtd módulos preditos: ", count_items_from_predict(matrix_class, 1))

              precision    recall  f1-score   support

           0      0.840     0.909     0.873       242
           1      0.421     0.276     0.333        58

    accuracy                          0.787       300
   macro avg      0.630     0.592     0.603       300
weighted avg      0.759     0.787     0.769       300

Qtd módulos com defeito na base:  58
Qtd módulos preditos:  38


In [10]:
print("Resultado do classificador com melhor acurácia no pool")
print(classification_report(y_test, max_classifier_score.prediction, digits=3))

print("Resultado do classificador com melhor F1 Score no pool")
print(classification_report(y_test, max_classifier_f1.prediction, digits=3))

Resultado do classificador com melhor acurácia no pool
              precision    recall  f1-score   support

           0      0.848     0.942     0.892       242
           1      0.548     0.293     0.382        58

    accuracy                          0.817       300
   macro avg      0.698     0.618     0.637       300
weighted avg      0.790     0.817     0.794       300

Resultado do classificador com melhor F1 Score no pool
              precision    recall  f1-score   support

           0      0.868     0.785     0.824       242
           1      0.358     0.500     0.417        58

    accuracy                          0.730       300
   macro avg      0.613     0.643     0.621       300
weighted avg      0.769     0.730     0.746       300



In [None]:
matrix.get_results(["recall"])
# [count_items_from_predict(p["prediction"],1) for p in matrix.get_results(["prediction"])]

In [None]:
[c.get_results(["recall"]) for c in pool.get_classifiers()]