Lê os dados:

In [37]:
import pandas as pd
df = pd.read_csv("Base_Dissertacaov10.csv",sep=';',decimal=",",encoding = "latin")
# print(df.head())

In [38]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
#Cria os dados
y = df["AUTO"].to_numpy()
X = df.iloc[:,3:-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

#Padroniza os dados
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Random Over Sampler

In [39]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler(random_state=12345)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

In [40]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
logreg = LogisticRegression(solver = 'lbfgs', multi_class = 'auto')
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print(f' Accuracy Score: {accuracy_score(y_test, y_pred)}')
print(f' Precision Score: {precision_score(y_test, y_pred, average = None)}')
print(f' Recall Score: {recall_score(y_test, y_pred, average = None)}')


 Accuracy Score: 0.9946990957280948
 Precision Score: [0.99531981 0.        ]
 Recall Score: [0.99937343 0.        ]


# Random Over Sampler
Copiado de : https://audreymychan.com/unbalancedclasses.html

In [41]:
# Method to rebalance train data, model a logistic regression, and output charts for predictions on test data
# Inputs: X data, y data, rebalance algorithm (i.e. SMOTE()), rebalancing_title as a str (i.e. 'SMOTE')
from imblearn.metrics import classification_report_imbalanced, sensitivity_specificity_support,sensitivity_score,specificity_score,geometric_mean_score, make_index_balanced_accuracy
def rebalance_train_test_logreg(X, y, rebalance_alg, rebalancing_title, test_size=0.2):

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 42)

    # Rebalance train data
    rebalance = rebalance_alg
    X_reb, y_reb = rebalance.fit_sample(X_train, y_train)

    # Train a Logistic Regression model on resampled data
    logreg = LogisticRegression(solver = 'lbfgs', multi_class = 'auto')
    logreg.fit(X_reb, y_reb)

    # Generate predictions
    y_pred = logreg.predict(X_test)

    # Print out metrics
    print(f'-------------------------------{rebalancing_title} test size={test_size}--------------------------------------')
    print("----------   Standard Metrics   -------------------")
    print(f' Accuracy Score: {accuracy_score(y_test, y_pred)}')
    print(f' Precision Score: {precision_score(y_test, y_pred, average = None)}')
    print(f' Recall Score: {recall_score(y_test, y_pred, average = None)}')
    print("----------   Imbalanced Metrics   -------------------")
    print(f' Classification report imbalanced: {classification_report_imbalanced(y_test, y_pred)}')
    print(f' Sensitivity specificity support:')
    print(sensitivity_specificity_support(y_test, y_pred))
    print(f' Sensitivity score: {sensitivity_score(y_test, y_pred)}')
    print(f' Specificity score: {specificity_score(y_test, y_pred)}')
    print(f' Geometric mean score: {geometric_mean_score(y_test, y_pred)}')

    return None


# ClusterCentroids

In [42]:
test_size=0.2
try:
    from imblearn.under_sampling import ClusterCentroids
    rebalance_train_test_logreg(X, y, ClusterCentroids(), 'ClusterCentroids',test_size)
except:
    print("Erro!")


-------------------------------ClusterCentroids test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x11d1bae60



# CondensedNearestNeighbour

In [43]:
try:
    from imblearn.under_sampling import CondensedNearestNeighbour
    rebalance_train_test_logreg(X, y, CondensedNearestNeighbour(), 'CondensedNearestNeighbour',test_size)
except:
    print("Erro!")


-------------------------------CondensedNearestNeighbour test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x



# EditedNearestNeighbours

In [44]:
try:
    from imblearn.under_sampling import EditedNearestNeighbours
    rebalance_train_test_logreg(X, y, EditedNearestNeighbours(), 'EditedNearestNeighbours',test_size)
except:
    print("Erro!")

-------------------------------EditedNearestNeighbours test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x12



# EditedNearestNeighbours

In [45]:
try:
    from imblearn.under_sampling import RepeatedEditedNearestNeighbours
    rebalance_train_test_logreg(X, y, RepeatedEditedNearestNeighbours(), 'RepeatedEditedNearestNeighbours',test_size)
except:
    print("Erro!")


-------------------------------RepeatedEditedNearestNeighbours test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate



# AllKNN

In [46]:
try:
    from imblearn.under_sampling import AllKNN
    rebalance_train_test_logreg(X, y, AllKNN(), 'AllKNN',test_size)
except:
    print("Erro!")

-------------------------------AllKNN test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x1290ebd40>




# InstanceHardnessThreshold

In [47]:
try:
    from imblearn.under_sampling import InstanceHardnessThreshold
    rebalance_train_test_logreg(X, y, InstanceHardnessThreshold(), 'InstanceHardnessThreshold',test_size)
except:
    print("Erro!")


-------------------------------InstanceHardnessThreshold test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x



# NearMiss

In [48]:
try:
    from imblearn.under_sampling import NearMiss
    rebalance_train_test_logreg(X, y, NearMiss(), 'NearMiss',test_size)
except:
    print("Erro!")

-------------------------------NearMiss test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.004989086373557842
 Precision Score: [1.         0.00467873]
 Recall Score: [3.13283208e-04 1.00000000e+00]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.00      1.00      0.00      0.02      0.00      3192
          1       0.00      1.00      0.00      0.01      0.02      0.00        15

avg / total       1.00      0.00      1.00      0.00      0.02      0.00      3207

 Sensitivity specificity support: (array([3.13283208e-04, 1.00000000e+00]), array([1.00000000e+00, 3.13283208e-04]), array([3192,   15]))
 Sensitivity score: 1.0
 Specificity score: 0.0003132832080200501
 Geometric mean score: 0.017699808135119722
 Index balanced accuracy: <function make_index_balanced_a



# NeighbourhoodCleaningRule

In [49]:
try:
    from imblearn.under_sampling import NeighbourhoodCleaningRule
    rebalance_train_test_logreg(X, y, NeighbourhoodCleaningRule(), 'NeighbourhoodCleaningRule',test_size)
except:
    print("Erro!")

-------------------------------NeighbourhoodCleaningRule test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x



# OneSidedSelection

In [50]:
try:
    from imblearn.under_sampling import OneSidedSelection
    rebalance_train_test_logreg(X, y, OneSidedSelection(), 'OneSidedSelection',test_size)
except:
    print("Erro!")

-------------------------------OneSidedSelection test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x128f6a9e



# RandomUnderSampler

In [51]:
try:
    from imblearn.under_sampling import RandomUnderSampler
    rebalance_train_test_logreg(X, y, RandomUnderSampler(), 'RandomUnderSampler',test_size)
except:
    print("Erro!")

-------------------------------RandomUnderSampler test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.004989086373557842
 Precision Score: [1.         0.00467873]
 Recall Score: [3.13283208e-04 1.00000000e+00]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.00      1.00      0.00      0.02      0.00      3192
          1       0.00      1.00      0.00      0.01      0.02      0.00        15

avg / total       1.00      0.00      1.00      0.00      0.02      0.00      3207

 Sensitivity specificity support: (array([3.13283208e-04, 1.00000000e+00]), array([1.00000000e+00, 3.13283208e-04]), array([3192,   15]))
 Sensitivity score: 1.0
 Specificity score: 0.0003132832080200501
 Geometric mean score: 0.017699808135119722
 Index balanced accuracy: <function make_index_



# TomekLinks

In [52]:
try:
    from imblearn.under_sampling import TomekLinks
    rebalance_train_test_logreg(X, y, TomekLinks(), 'TomekLinks',test_size)
except:
    print("Erro!")

-------------------------------TomekLinks test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.9950109136264421
 Precision Score: [0.99532127 0.        ]
 Recall Score: [0.99968672 0.        ]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      1.00      0.00      1.00      0.00      0.00      3192
          1       0.00      0.00      1.00      0.00      0.00      0.00        15

avg / total       0.99      1.00      0.00      0.99      0.00      0.00      3207

 Sensitivity specificity support: (array([0.99968672, 0.        ]), array([0.        , 0.99968672]), array([3192,   15]))
 Sensitivity score: 0.0
 Specificity score: 0.99968671679198
 Geometric mean score: 0.0
 Index balanced accuracy: <function make_index_balanced_accuracy.<locals>.decorate at 0x1290c8560>




# ADASYN

In [53]:
try:
    from imblearn.over_sampling import ADASYN
    rebalance_train_test_logreg(X, y, ADASYN(), 'ADASYN',test_size)
except:
    print("Erro!")

-------------------------------ADASYN test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.004989086373557842
 Precision Score: [1.         0.00467873]
 Recall Score: [3.13283208e-04 1.00000000e+00]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.00      1.00      0.00      0.02      0.00      3192
          1       0.00      1.00      0.00      0.01      0.02      0.00        15

avg / total       1.00      0.00      1.00      0.00      0.02      0.00      3207

 Sensitivity specificity support: (array([3.13283208e-04, 1.00000000e+00]), array([1.00000000e+00, 3.13283208e-04]), array([3192,   15]))
 Sensitivity score: 1.0
 Specificity score: 0.0003132832080200501
 Geometric mean score: 0.017699808135119722
 Index balanced accuracy: <function make_index_balanced_acc



# BorderlineSMOTE

In [54]:
try:
    from imblearn.over_sampling import BorderlineSMOTE
    rebalance_train_test_logreg(X, y, BorderlineSMOTE(), 'BorderlineSMOTE',test_size)
except:
    print("Erro!")

-------------------------------BorderlineSMOTE test size=0.2--------------------------------------
----------   Standard Metrics   -------------------
 Accuracy Score: 0.004989086373557842
 Precision Score: [1.         0.00467873]
 Recall Score: [3.13283208e-04 1.00000000e+00]
----------   Imbalanced Metrics   -------------------
 Classification report imbalanced:                    pre       rec       spe        f1       geo       iba       sup

          0       1.00      0.00      1.00      0.00      0.02      0.00      3192
          1       0.00      1.00      0.00      0.01      0.02      0.00        15

avg / total       1.00      0.00      1.00      0.00      0.02      0.00      3207

 Sensitivity specificity support: (array([3.13283208e-04, 1.00000000e+00]), array([1.00000000e+00, 3.13283208e-04]), array([3192,   15]))
 Sensitivity score: 1.0
 Specificity score: 0.0003132832080200501
 Geometric mean score: 0.017699808135119722
 Index balanced accuracy: <function make_index_bal



# KMeansSMOTE

In [58]:
try:
    from imblearn.over_sampling import KMeansSMOTE
    rebalance_train_test_logreg(X, y, KMeansSMOTE(), 'KMeansSMOTE',test_size)
except:
    print("Erro!")

Erro!


# RandomOverSampler

In [None]:
try:
    from imblearn.over_sampling import RandomOverSampler
    rebalance_train_test_logreg(X, y, RandomOverSampler(), 'RandomOverSampler',test_size)
except:
    print("Erro!")

# SMOTE

In [None]:
try:
    from imblearn.over_sampling import SMOTE
    rebalance_train_test_logreg(X, y, SMOTE(), 'SMOTE',test_size)
except:
    print("Erro!")

# SMOTENC

In [None]:
try:
    from imblearn.over_sampling import SMOTENC
    rebalance_train_test_logreg(X, y, SMOTENC(), 'SMOTENC',test_size)
except:
    print("Erro!")

# SVMSMOTE

In [None]:
try:
    from imblearn.over_sampling import SVMSMOTE
    rebalance_train_test_logreg(X, y, SVMSMOTE(), 'SVMSMOTE',test_size)
except:
    print("Erro!")

# SMOTEENN

In [None]:
try:
    from imblearn.combine import SMOTEENN
    rebalance_train_test_logreg(X, y, SMOTEENN(), 'SMOTEENN',test_size)
except:
    print("Erro!")

# SMOTETomek

In [None]:
try:
    from imblearn.combine import SMOTETomek
    rebalance_train_test_logreg(X, y, SMOTETomek(), 'SMOTETomek',test_size)
except:
    print("Erro!")


# BalancedBaggingClassifier

In [None]:
try:
    from imblearn.ensemble import BalancedBaggingClassifier
    rebalance_train_test_logreg(X, y, BalancedBaggingClassifier(), 'BalancedBaggingClassifier',test_size)
except:
    print("Erro!")

# BalancedRandomForestClassifier

In [None]:
try:
    from imblearn.ensemble import BalancedRandomForestClassifier
    rebalance_train_test_logreg(X, y, BalancedRandomForestClassifier(), 'BalancedRandomForestClassifier',test_size)
except:
    print("Erro!")

# EasyEnsemble

In [None]:
try:
    from imblearn.ensemble import EasyEnsembleClassifier
    rebalance_train_test_logreg(X, y, EasyEnsembleClassifier(), 'EasyEnsembleClassifier',test_size)
except:
    print("Erro!")

# RUSBoostClassifier

In [None]:
try:
    from imblearn.ensemble import RUSBoostClassifier
    rebalance_train_test_logreg(X, y, RUSBoostClassifier(), 'RUSBoostClassifier',test_size)
except:
    print("Erro!")


# RUSBoostClassifier

In [None]:
try:
    from imblearn.ensemble import RUSBoostClassifier
    rebalance_train_test_logreg(X, y, RUSBoostClassifier(), 'RUSBoostClassifier',test_size)
except:
    print("Erro!")
