# Comparação de Ensembles

In [1]:
import warnings
warnings.filterwarnings('ignore')
warnings.warn('DelftStack')
warnings.warn('Do not show this message')
print("No Warning Shown")



In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [3]:
from scipy.io import arff
import pandas as pd

data = arff.loadarff('OBS-Network-DataSet_2_Aug27.arff')
df = pd.DataFrame(data[0])
df.head()

Unnamed: 0,Node,Utilised Bandwith Rate,Packet Drop Rate,Full_Bandwidth,Average_Delay_Time_Per_Sec,Percentage_Of_Lost_Pcaket_Rate,Percentage_Of_Lost_Byte_Rate,Packet Received Rate,of Used_Bandwidth,Lost_Bandwidth,...,Packet_Received,Packet_lost,Transmitted_Byte,Received_Byte,10-Run-AVG-Drop-Rate,10-Run-AVG-Bandwith-Use,10-Run-Delay,Node Status,Flood Status,Class
0,3.0,0.822038,0.190381,1000.0,0.004815,19.031487,19.038129,0.809619,822.0375,177.9625,...,73128.0,17196.0,130066560.0,105304320.0,0.146594,0.780936,0.001838,b'B',0.023455,b'NB-No Block'
1,9.0,0.275513,0.729111,100.0,0.004815,72.889036,72.911141,0.270889,27.55125,72.44875,...,2451.0,6598.0,13029120.0,3529440.0,0.517669,0.242451,0.002236,b'NB',0.460725,b'Block'
2,3.0,0.923707,0.090383,900.0,0.000633,9.035834,9.038339,0.909617,831.336,68.664,...,73930.0,7346.0,117037440.0,106459200.0,0.058749,0.886758,0.001751,b'B',0.0,b'No Block'
3,9.0,0.368775,0.63771,100.0,0.000552,63.737843,63.770999,0.36229,36.8775,63.1225,...,3278.0,5770.0,13029120.0,4720320.0,0.522922,0.324522,0.001776,b'NB',0.439255,b'Block'
4,3.0,0.905217,0.10867,800.0,0.000497,10.864208,10.866977,0.89133,724.17375,75.82625,...,64379.0,7849.0,104008320.0,92705760.0,0.076069,0.869009,0.001767,b'B',0.0,b'No Block'


In [4]:
df = df.drop('Node Status', axis=1)
df = df.dropna(subset=['Packet_lost'])

In [5]:
from sklearn.preprocessing import LabelEncoder 
labelencoder= LabelEncoder()
df.Class = labelencoder.fit_transform(df.Class)

In [6]:
X = df.iloc[:,:-1].to_numpy()
y = df.iloc[:,-1].to_numpy()
X.shape, y.shape

((1060, 20), (1060,))

In [7]:
from sklearn.model_selection import train_test_split
X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=42)
X_tr.shape, X_te.shape, y_tr.shape, y_te.shape

((795, 20), (265, 20), (795,), (265,))

# Classificador Voting

In [8]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.model_selection import GridSearchCV, cross_validate, RepeatedKFold
import numpy as np
from sklearn.pipeline import Pipeline

pipetotal = Pipeline([
    #('atributosDesejados', AtributosDesejados()),
    #('trataAtributos', trataAtributos),
    ('classificador', VotingClassifier([
        ('knn', KNeighborsClassifier()),
        ('naivebayes', GaussianNB()),
        ('lr', LogisticRegression())
    ])
)])

parametros = {
    'classificador__knn__n_neighbors': [3, 5, 7, 9],
    'classificador__lr__penalty': ['none', 'l2', 'l1', 'elasticnet'],
    'classificador__voting': ['hard', 'soft'],
}
modelo = GridSearchCV(pipetotal, param_grid=parametros)

scores = cross_validate(modelo, X_tr, y_tr, cv=RepeatedKFold(random_state=101))
scores['test_score'], np.mean(scores['test_score']), np.std(scores['test_score'])

(array([0.93710692, 0.86163522, 0.79874214, 0.82389937, 0.87421384,
        0.79245283, 0.90566038, 0.88679245, 0.87421384, 0.89937107,
        0.89937107, 0.90566038, 0.90566038, 0.80503145, 0.88679245,
        0.94968553, 0.86163522, 0.86163522, 0.88050314, 0.91194969,
        0.86163522, 0.86792453, 0.91194969, 0.86163522, 0.83018868,
        0.88050314, 0.91194969, 0.8427673 , 0.88679245, 0.91823899,
        0.83647799, 0.90566038, 0.93081761, 0.88050314, 0.89308176,
        0.9245283 , 0.88050314, 0.86792453, 0.85534591, 0.91823899,
        0.89937107, 0.9245283 , 0.86163522, 0.86792453, 0.86792453,
        0.93081761, 0.86792453, 0.88679245, 0.8490566 , 0.87421384]),
 0.880377358490566,
 0.03480187414791209)

In [9]:
from sklearn.metrics import accuracy_score

modelo.fit(X_tr, y_tr)
y_pred = modelo.predict(X_te)

print("Accuracy stacking classifier :", accuracy_score(y_te, y_pred))

Accuracy stacking classifier : 0.8981132075471698


# Classificador Stacking

In [10]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier

estimators = [  ('knn', KNeighborsClassifier()),
                ('naivebayes', GaussianNB()),
                ('lr', LogisticRegression())]


pipetotal = Pipeline([
    ('classificador', StackingClassifier(estimators))
])


params = {
    'classificador__knn__n_neighbors': [5, 9],
    'classificador__lr__penalty': ['none', 'l2', 'l1', 'elasticnet'],
    'classificador__passthrough': [True, False],
    #'classificador__estimators': [estimators],
    'classificador__final_estimator': [DecisionTreeClassifier()],
    'classificador__cv': [3, 5]
    }

modelo_stacking = GridSearchCV(estimator=pipetotal, param_grid=params, cv=5)

scores = cross_validate(modelo_stacking, X_tr, y_tr, cv=RepeatedKFold(random_state=101))
scores['test_score'], np.mean(scores['test_score']), np.std(scores['test_score'])

(array([0.90566038, 0.90566038, 0.94968553, 0.94339623, 0.95597484,
        0.94968553, 0.95597484, 0.98742138, 0.9245283 , 0.91823899,
        0.93081761, 0.89308176, 0.94339623, 0.95597484, 0.99371069,
        0.93710692, 0.9245283 , 0.94339623, 0.95597484, 0.96226415,
        0.96855346, 0.96855346, 0.94339623, 0.97484277, 0.9245283 ,
        0.96226415, 0.97484277, 0.96855346, 0.94339623, 0.95597484,
        0.99371069, 0.9245283 , 0.98742138, 0.96855346, 0.96226415,
        0.96226415, 0.89937107, 0.97484277, 0.98113208, 0.91823899,
        0.96226415, 0.95597484, 0.94339623, 0.91194969, 0.98113208,
        0.96855346, 0.98113208, 0.96226415, 0.97484277, 0.9245283 ]),
 0.9511949685534592,
 0.025300222564577857)

In [11]:
modelo_stacking.fit(X_tr, y_tr)
y_pred = modelo_stacking.predict(X_te)

print("Accuracy stacking classifier :", accuracy_score(y_te, y_pred))

Accuracy stacking classifier : 0.8867924528301887
