https://deslib.readthedocs.io/en/latest/index.html

In [1]:
!pip install deslib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting deslib
  Downloading DESlib-0.3.5-py3-none-any.whl (158 kB)
[K     |████████████████████████████████| 158 kB 15.9 MB/s 
Installing collected packages: deslib
Successfully installed deslib-0.3.5


In [9]:
import pandas as pd

from sklearn.ensemble import BaggingClassifier
from deslib.static import Oracle, StaticSelection, SingleBest

from deslib.dcs import OLA, LCA
from deslib.des import KNORAU, KNORAE

from deslib.util.aggregation import majority_voting, average_combiner, minimum_combiner, maximum_combiner, product_combiner

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/JusciAvelino/Monitoria-ModelagemPreditivaDescritiva1/main/Breast%20Cancer%20Wisconsin%20(Diagnostic).csv')
df = df.replace({'M': 1, 'B': 0})
df.head()

Unnamed: 0,diagnosis,radius_Mean,texture_Mean,periMeter_Mean,area_Mean,sMoothness_Mean,coMpactness_Mean,concavity_Mean,concave points_Mean,syMMetry_Mean,...,radius_worst,texture_worst,periMeter_worst,area_worst,sMoothness_worst,coMpactness_worst,concavity_worst,concave points_worst,syMMetry_worst,fractal_diMension_worst
0,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [4]:
# split the data into training and test data
X = df.drop('diagnosis', axis=1)
y = df['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# Split the data into training and DSEL for DS techniques
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                    test_size=0.5)

# Combinação estática

In [17]:
static_result = []
for n in [10, 20, 30]:
  pool_classifiers = BaggingClassifier(n_estimators=n)
  pool_classifiers.fit(X_train, y_train)

  static_model = StaticSelection(pool_classifiers)
  single_model = SingleBest(pool_classifiers)

  static_model.fit(X_dsel, y_dsel)
  single_model.fit(X_dsel, y_dsel)

  static_pred = static_model.predict(X_test)
  single_pred = static_model.predict(X_test)

  majority_pred = majority_voting(pool_classifiers, X_test)
  average_pred = average_combiner(pool_classifiers, X_test)
  product_pred = product_combiner(pool_classifiers, X_test)
  maximum_pred = maximum_combiner(pool_classifiers, X_test)
  minimum_pred = minimum_combiner(pool_classifiers, X_test)

  oracle = Oracle(pool_classifiers).fit(X_train, y_train)
  oracle_pred = oracle.predict(X_test, y_test)

  #calc f1-score for each model
  scores = [n,
            f1_score(y_test, static_pred),
            f1_score(y_test, single_pred),
            f1_score(y_test, majority_pred),
            f1_score(y_test, average_pred),
            f1_score(y_test, product_pred),
            f1_score(y_test, maximum_pred),
            f1_score(y_test, minimum_pred),
            f1_score(y_test, oracle_pred)]
  
  static_result.append(pd.DataFrame([scores], columns=['size_pool',
                                                       'Static',
                                                       'Single',
                                                       'Majority',
                                                       'Average',
                                                       'Product',
                                                       'Max',
                                                       'Min',
                                                       'Oracle']))

static_result = pd.concat(static_result).reset_index(drop=True).round(3)
static_result

Unnamed: 0,size_pool,Static,Single,Majority,Average,Product,Max,Min,Oracle
0,10,0.971,0.971,0.957,0.957,0.81,0.81,0.81,0.993
1,20,0.899,0.899,0.935,0.935,0.714,0.714,0.714,1.0
2,30,0.944,0.944,0.93,0.93,0.769,0.769,0.769,1.0


# Combinação dinâmica

In [6]:
dynamic_result = []
for n in [10, 20, 30]:
  pool_classifiers = BaggingClassifier(n_estimators=n)
  pool_classifiers.fit(X_train, y_train)

  ola = OLA(pool_classifiers)
  lca = LCA(pool_classifiers)
  kne = KNORAE(pool_classifiers)
  knu = KNORAU(pool_classifiers)

  ola.fit(X_dsel, y_dsel)
  lca.fit(X_dsel, y_dsel)
  kne.fit(X_dsel, y_dsel)
  knu.fit(X_dsel, y_dsel)

  ola_pred = ola.predict(X_test)
  lca_pred = lca.predict(X_test)
  kne_pred = kne.predict(X_test)
  knu_pred = knu.predict(X_test)

  oracle = Oracle(pool_classifiers).fit(X_train, y_train)
  oracle_pred = oracle.predict(X_test, y_test)

  scores = [n,
            f1_score(y_test, ola_pred),
            f1_score(y_test, lca_pred),
            f1_score(y_test, kne_pred),
            f1_score(y_test, knu_pred),
            f1_score(y_test, oracle_pred)]

  dynamic_result.append(pd.DataFrame([scores], columns=['size_pool', 'OLA', 'LCA', 'KNORAE', 'KNORAU', 'Oracle']))

dynamic_result = pd.concat(dynamic_result).reset_index(drop=True).round(3)
dynamic_result

Unnamed: 0,size_pool,OLA,LCA,KNORAE,KNORAU,Oracle
0,10,0.903,0.912,0.93,0.922,0.986
1,20,0.95,0.919,0.92,0.929,0.993
2,30,0.922,0.907,0.935,0.944,1.0
