# Regressão logística

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

In [None]:
# Importando a base de dados
df = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/credit_risk_dataset.csv', delimiter=';')
df.head(5)

In [None]:
df.describe()

In [None]:
# Verificando a existência de dados espúrios
print('\nContando os 144 na amostra:\n')
print(f'Idade fora do normal: {(df["person_age"]==144).sum()}')

In [None]:
# Excluindo dados espúrios
print(f'Linhas antes da exclusão: {len(df)}')
excluir_idade_144 = df.index[df.person_age == 144].tolist()
temp = excluir_idade_144
cred_risk = df.drop(df.index[temp])
classe_0 = len(cred_risk[cred_risk['loan_status'] == 0])
classe_1 = len(cred_risk[cred_risk['loan_status'] == 1])
print(f'Linhas após a exclusão: {len(cred_risk)}')
print(f'Pessoas em dia na amostra: {classe_0} ({(classe_0 * 100 / (classe_0 + classe_1)):.2f}%)')
print(f'Pessoas em dia na amostra: {classe_1} ({(classe_1 * 100 / (classe_0 + classe_1)):.2f}%)')
cred_risk.to_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crg.csv')

In [None]:
cr = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crg.csv', delimiter=',')
cr.describe()

In [None]:
# Excluindo dados espúrios
print(f'Linhas antes da exclusão: {len(cr)}')
excluir_idade_123 = cr.index[cr.person_age == 123].tolist()
temp2 = excluir_idade_123
cred_risk3 = cr.drop(cr.index[temp2])
classe_0 = len(cred_risk3[cred_risk3['loan_status'] == 0])
classe_1 = len(cred_risk3[cred_risk3['loan_status'] == 1])
print(f'Linhas após a exclusão: {len(cred_risk3)}')
print(f'Pessoas em dia na amostra: {classe_0} ({(classe_0 * 100 / (classe_0 + classe_1)):.2f}%)')
print(f'Pessoas em dia na amostra: {classe_1} ({(classe_1 * 100 / (classe_0 + classe_1)):.2f}%)')
cred_risk3.to_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv')

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
crisk=crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
crisk.describe()

In [None]:
# Análise descritiva com gráficos
sns.set_theme(font_scale=1.3, rc={'figure.figsize': (20,20)})
eixo = crisk.hist(bins=20, color='blue')

In [None]:
# Separando variáveis preditoras e alvo
X = crisk.iloc[:, :5].values
y = crisk.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

Agora seguiremos por dois caminhos:
- Caminho 1 - Base normalizada sem balancear
- Caminho 2 - Base normalizada balanceada

O intuito é treinar o modelo dessas duas formas e testar em uma mesma base de treino para comparar o desempenho.

Caminho 1 - Treinando modelo em base desbalanceada

In [None]:
X_train_geral_des = X_train
X_test_geral_des = X_test
y_train_geral_des = y_train
y_test_geral_des = y_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_geral_des_scaled = scaler.fit_transform(X_train_geral_des)
X_test_geral_des_scaled = scaler.transform(X_test_geral_des)

In [None]:
Func_Log_geral_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_geral_des.fit(X_train_geral_des, y_train_geral_des)

In [None]:
prev_geral_des = Func_Log_geral_des.predict(X_test_geral_des)

In [None]:
confusion_matrix(y_test_geral_des, prev_geral_des)

In [None]:
print(classification_report(y_test_geral_des, prev_geral_des))

Caminho 2 - Treinando o modelo em base balanceada (usando Oversempling)

In [None]:
X_train_geral_bal = X_train
X_test_geral_bal = X_test
y_train_geral_bal = y_train
y_test_geral_bal = y_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_geral_bal_scaled = scaler.fit_transform(X_train_geral_bal)
X_test_geral_bal_scaled = scaler.transform(X_test_geral_bal)

In [None]:
Func_Log_geral_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_geral_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled, y_train_resampled = ros_geral_balanc.fit_resample(X_train_geral_bal, y_train_geral_bal)

In [None]:
print(pd.value_counts(y_train_resampled))

In [None]:
Func_Log_geral_bal.fit(X_train_resampled, y_train_resampled)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal = Func_Log_geral_bal.predict(X_test_geral_bal)

In [None]:
confusion_matrix(y_test_geral_bal, prev_bal)

In [None]:
print(classification_report(y_test_geral_bal, prev_bal))

Caminho 2.1 - Treinando o modelo em base balanceada (usando Undersempling)

In [None]:
X_train_geral_bal_under = X_train
X_test_geral_bal_under = X_test
y_train_geral_bal_under = y_train
y_test_geral_bal_under = y_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_geral_bal_under_scaled = scaler.fit_transform(X_train_geral_bal_under)
X_test_geral_bal_under_scaled = scaler.transform(X_test_geral_bal_under)

In [None]:
Func_Log_geral_bal_under = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_geral_balanc_under = RandomUnderSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_under, y_train_resampled_under = ros_geral_balanc_under.fit_resample(X_train_geral_bal_under, y_train_geral_bal_under)

In [None]:
print(pd.value_counts(y_train_resampled_under))

In [None]:
Func_Log_geral_bal_under.fit(X_train_resampled_under, y_train_resampled_under)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal = Func_Log_geral_bal_under.predict(X_test_geral_bal_under)

In [None]:
confusion_matrix(y_test_geral_bal_under, prev_bal)

In [None]:
print(classification_report(y_test_geral_bal_under, prev_bal))

Neste momento, estou dividindo a base em subcategorias e aplicando o modelo treinado anteriormente, tanto em base desbalanceada quanto em base balanceada

Categoria A

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
df_crisk_subcat=pd.DataFrame(crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length','loan_grade',
                         'loan_status']])
df_crisk_subcat.describe()

In [None]:
subcat_A = pd.DataFrame(df_crisk_subcat.query('loan_grade =="A"'))
cat_A = subcat_A[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
cat_A.head(5)

In [None]:
cat_A.describe()

In [None]:
# Separando variáveis preditoras e alvo
X_sub_A = cat_A.iloc[:, :5].values
y_sub_A = cat_A.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_sub_A_train, X_sub_A_test, y_sub_A_train, y_sub_A_test = train_test_split(X_sub_A, y_sub_A, test_size=0.20, random_state=0)

Categoria A desbalanceada

In [None]:
X_train_A_des = X_sub_A_train
X_test_A_des = X_sub_A_test
y_train_A_des = y_sub_A_train
y_test_A_des = y_sub_A_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_A_des_scaled = scaler.fit_transform(X_train_A_des)
X_test_A_des_scaled = scaler.transform(X_test_A_des)

In [None]:
Func_Log_A_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_A_des.fit(X_train_A_des, y_train_A_des)

In [None]:
prev_A_des = Func_Log_A_des.predict(X_test_A_des)

In [None]:
confusion_matrix(y_test_A_des, prev_A_des)

In [None]:
print(classification_report(y_test_A_des, prev_A_des))

Categoria A Balanceada (Oversampling)

In [None]:
X_train_A_bal = X_sub_A_train
X_test_A_bal = X_sub_A_test
y_train_A_bal = y_sub_A_train
y_test_A_bal = y_sub_A_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_A_bal_scaled = scaler.fit_transform(X_train_A_bal)
X_test_A_bal_scaled = scaler.transform(X_test_A_bal)

In [None]:
Func_Log_A_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_A_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_A, y_train_resampled_A = ros_A_balanc.fit_resample(X_train_A_bal, y_train_A_bal)

In [None]:
Func_Log_A_bal.fit(X_train_resampled_A, y_train_resampled_A)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_A = Func_Log_A_bal.predict(X_test_A_bal)

In [None]:
confusion_matrix(y_test_A_bal, prev_bal_A)

In [None]:
print(classification_report(y_test_A_bal, prev_bal_A))

Categoria B

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
df_crisk_subcat=pd.DataFrame(crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length','loan_grade',
                         'loan_status']])
df_crisk_subcat.describe()

In [None]:
subcat_B = pd.DataFrame(df_crisk_subcat.query('loan_grade =="B"'))
cat_B = subcat_B[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
cat_B.head(5)

In [None]:
cat_B.describe()

In [None]:
# Separando variáveis preditoras e alvo
X_sub_B = cat_B.iloc[:, :5].values
y_sub_B = cat_B.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_sub_B_train, X_sub_B_test, y_sub_B_train, y_sub_B_test = train_test_split(X_sub_B, y_sub_B, test_size=0.20, random_state=0)

Categoria B desbalanceada

In [None]:
X_train_B_des = X_sub_B_train
X_test_B_des = X_sub_B_test
y_train_B_des = y_sub_B_train
y_test_B_des = y_sub_B_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_B_des_scaled = scaler.fit_transform(X_train_B_des)
X_test_B_des_scaled = scaler.transform(X_test_B_des)

In [None]:
Func_Log_B_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_B_des.fit(X_train_B_des, y_train_B_des)

In [None]:
prev_B_des = Func_Log_B_des.predict(X_test_B_des)

In [None]:
confusion_matrix(y_test_B_des, prev_B_des)

In [None]:
print(classification_report(y_test_B_des, prev_B_des))

Categoria B balanceada (Oversampling)

In [None]:
X_train_B_bal = X_sub_B_train
X_test_B_bal = X_sub_B_test
y_train_B_bal = y_sub_B_train
y_test_B_bal = y_sub_B_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_B_bal_scaled = scaler.fit_transform(X_train_B_bal)
X_test_B_bal_scaled = scaler.transform(X_test_B_bal)

In [None]:
Func_Log_B_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_B_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_B, y_train_resampled_B = ros_B_balanc.fit_resample(X_train_B_bal, y_train_B_bal)

In [None]:
Func_Log_B_bal.fit(X_train_resampled_B, y_train_resampled_B)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_B = Func_Log_B_bal.predict(X_test_B_bal)

In [None]:
confusion_matrix(y_test_B_bal, prev_bal_B)

In [None]:
print(classification_report(y_test_B_bal, prev_bal_B))

Categoria C

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
df_crisk_subcat=pd.DataFrame(crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length','loan_grade',
                         'loan_status']])
df_crisk_subcat.describe()

In [None]:
subcat_C = pd.DataFrame(df_crisk_subcat.query('loan_grade =="C"'))
cat_C = subcat_C[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
cat_C.head(5)

In [None]:
cat_C.describe()

In [None]:
# Separando variáveis preditoras e alvo
X_sub_C = cat_C.iloc[:, :5].values
y_sub_C = cat_C.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_sub_C_train, X_sub_C_test, y_sub_C_train, y_sub_C_test = train_test_split(X_sub_C, y_sub_C, test_size=0.20, random_state=0)

Categoria C desbalanceada

In [None]:
X_train_C_des = X_sub_C_train
X_test_C_des = X_sub_C_test
y_train_C_des = y_sub_C_train
y_test_C_des = y_sub_C_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_C_des_scaled = scaler.fit_transform(X_train_C_des)
X_test_C_des_scaled = scaler.transform(X_test_C_des)

In [None]:
Func_Log_C_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_C_des.fit(X_train_C_des, y_train_C_des)

In [None]:
prev_C_des = Func_Log_C_des.predict(X_test_C_des)

In [None]:
confusion_matrix(y_test_C_des, prev_C_des)

In [None]:
print(classification_report(y_test_C_des, prev_C_des))

Categoria C balanceada (Oversampling)

In [None]:
X_train_C_bal = X_sub_C_train
X_test_C_bal = X_sub_C_test
y_train_C_bal = y_sub_C_train
y_test_C_bal = y_sub_C_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_C_bal_scaled = scaler.fit_transform(X_train_C_bal)
X_test_C_bal_scaled = scaler.transform(X_test_C_bal)

In [None]:
Func_Log_C_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_C_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_C, y_train_resampled_C = ros_C_balanc.fit_resample(X_train_C_bal, y_train_C_bal)

In [None]:
Func_Log_C_bal.fit(X_train_resampled_C, y_train_resampled_C)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_C = Func_Log_C_bal.predict(X_test_C_bal)

In [None]:
confusion_matrix(y_test_C_bal, prev_bal_C)

In [None]:
print(classification_report(y_test_C_bal, prev_bal_C))

Categoria D

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
df_crisk_subcat=pd.DataFrame(crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length','loan_grade',
                         'loan_status']])
df_crisk_subcat.describe()


In [None]:
subcat_D = pd.DataFrame(df_crisk_subcat.query('loan_grade =="D"'))
cat_D = subcat_D[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
cat_D.head(5)


In [None]:
cat_D.describe()

In [None]:
# Separando variáveis preditoras e alvo
X_sub_D = cat_D.iloc[:, :5].values
y_sub_D = cat_D.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_sub_D_train, X_sub_D_test, y_sub_D_train, y_sub_D_test = train_test_split(X_sub_D, y_sub_D, test_size=0.20, random_state=0)

Categoria D desbalanceada

In [None]:
X_train_D_des = X_sub_D_train
X_test_D_des = X_sub_D_test
y_train_D_des = y_sub_D_train
y_test_D_des = y_sub_D_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_D_des_scaled = scaler.fit_transform(X_train_D_des)
X_test_D_des_scaled = scaler.transform(X_test_D_des)

In [None]:
Func_Log_D_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_D_des.fit(X_train_D_des, y_train_D_des)

In [None]:
prev_D_des = Func_Log_D_des.predict(X_test_D_des)

In [None]:
confusion_matrix(y_test_D_des, prev_D_des)

In [None]:
print(classification_report(y_test_D_des, prev_D_des))

Categoria D balanceada (Oversampling)

In [None]:
X_train_D_bal = X_sub_D_train
X_test_D_bal = X_sub_D_test
y_train_D_bal = y_sub_D_train
y_test_D_bal = y_sub_D_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_D_bal_scaled = scaler.fit_transform(X_train_D_bal)
X_test_D_bal_scaled = scaler.transform(X_test_D_bal)

In [None]:
Func_Log_D_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_D_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_D, y_train_resampled_D = ros_D_balanc.fit_resample(X_train_D_bal, y_train_D_bal)

In [None]:
Func_Log_D_bal.fit(X_train_resampled_D, y_train_resampled_D)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_D = Func_Log_D_bal.predict(X_test_D_bal)

In [None]:
confusion_matrix(y_test_D_bal, prev_bal_D)

In [None]:
print(classification_report(y_test_D_bal, prev_bal_D))

Categoria D (Undersampling)

In [None]:
X_train_D_bal_under = X_sub_D_train
X_test_D_bal_under = X_sub_D_test
y_train_D_bal_under = y_sub_D_train
y_test_D_bal_under = y_sub_D_test


In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_D_bal_under_scaled = scaler.fit_transform(X_train_D_bal_under)
X_test_D_bal_under_scaled = scaler.transform(X_test_D_bal_under)

In [None]:
Func_Log_D_bal_under = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_D_balanc_under = RandomUnderSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_D_under, y_train_resampled_D_under = ros_D_balanc_under.fit_resample(X_train_D_bal_under, y_train_D_bal_under)

In [None]:
Func_Log_D_bal_under.fit(X_train_resampled_D_under, y_train_resampled_D_under)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_D_under = Func_Log_D_bal_under.predict(X_test_D_bal_under)

In [None]:
confusion_matrix(y_test_D_bal_under, prev_bal_D_under)

In [None]:
print(classification_report(y_test_D_bal_under, prev_bal_D_under))

Categoria E

In [None]:
crisk = pd.read_csv('C:/Users/AMD/Desktop/DESKTOP ANTIGA/Área de trabalho/Leandro/MBA USP/TCC/Modelos/Testes/crisk_geral.csv', delimiter=',')
df_crisk_subcat=pd.DataFrame(crisk[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length','loan_grade',
                         'loan_status']])
df_crisk_subcat.describe()

In [None]:
subcat_E = pd.DataFrame(df_crisk_subcat.query('loan_grade =="E"'))
cat_E = subcat_E[['person_age','person_income','loan_amnt','loan_percent_income','cb_person_cred_hist_length',
                         'loan_status']]
cat_E.head(5)

In [None]:
cat_E.describe()

In [None]:
# Separando variáveis preditoras e alvo
X_sub_E = cat_E.iloc[:, :5].values
y_sub_E = cat_E.iloc[:, 5:6].values

In [None]:
# Dividindo o dataset entre treino e teste:

X_sub_E_train, X_sub_E_test, y_sub_E_train, y_sub_E_test = train_test_split(X_sub_E, y_sub_E, test_size=0.20, random_state=0)

Categoria E desbalanceada

In [None]:
X_train_E_des = X_sub_E_train
X_test_E_des = X_sub_E_test
y_train_E_des = y_sub_E_train
y_test_E_des = y_sub_E_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_E_des_scaled = scaler.fit_transform(X_train_E_des)
X_test_E_des_scaled = scaler.transform(X_test_E_des)

In [None]:
Func_Log_E_des = LogisticRegression(random_state=0, max_iter=500)
Func_Log_E_des.fit(X_train_E_des, y_train_E_des)

In [None]:
prev_E_des = Func_Log_E_des.predict(X_test_E_des)

In [None]:
confusion_matrix(y_test_E_des, prev_E_des)

In [None]:
print(classification_report(y_test_E_des, prev_E_des))

Categoria E balanceada (Oversampling)

In [None]:
X_train_E_bal = X_sub_E_train
X_test_E_bal = X_sub_E_test
y_train_E_bal = y_sub_E_train
y_test_E_bal = y_sub_E_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_E_bal_scaled = scaler.fit_transform(X_train_E_bal)
X_test_E_bal_scaled = scaler.transform(X_test_E_bal)

In [None]:
Func_Log_E_bal = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_E_balanc = RandomOverSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_E, y_train_resampled_E = ros_E_balanc.fit_resample(X_train_E_bal, y_train_E_bal)

In [None]:
Func_Log_E_bal.fit(X_train_resampled_E, y_train_resampled_E)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_E = Func_Log_E_bal.predict(X_test_E_bal)

In [None]:
confusion_matrix(y_test_E_bal, prev_bal_E)

In [None]:
print(classification_report(y_test_E_bal, prev_bal_E))

Categoria E (Undersampling)

In [None]:
X_train_E_bal_under = X_sub_E_train
X_test_E_bal_under = X_sub_E_test
y_train_E_bal_under = y_sub_E_train
y_test_E_bal_under = y_sub_E_test

In [None]:
# Aplicando a normalização Min-Max apenas às características
scaler = MinMaxScaler()
X_train_E_bal_under_scaled = scaler.fit_transform(X_train_E_bal_under)
X_test_E_bal_under_scaled = scaler.transform(X_test_E_bal_under)

In [None]:
Func_Log_E_bal_under = LogisticRegression(random_state=0, max_iter=500)

In [None]:
# Criar um objeto RandomOverSampler para ajustar os pesos das classes

ros_E_balanc_under = RandomUnderSampler(random_state=42)

In [None]:
# Aplicar o balanceamento de classes apenas no conjunto de treinamento

X_train_resampled_E_under, y_train_resampled_E_under = ros_E_balanc_under.fit_resample(X_train_E_bal_under, y_train_E_bal_under)

In [None]:
Func_Log_E_bal_under.fit(X_train_resampled_E_under, y_train_resampled_E_under)

In [None]:
# Usar o mesmo na balanceada e desbalanceada
prev_bal_E_under = Func_Log_E_bal_under.predict(X_test_E_bal_under)

In [None]:
confusion_matrix(y_test_E_bal_under, prev_bal_E_under)

In [None]:
print(classification_report(y_test_E_bal_under, prev_bal_E_under))