In [54]:
import pandas as pd
import numpy as np

df = pd.read_csv('credit.csv')
df

Unnamed: 0,sexo,renda,renda_conjunta,tempo,regiao,A6,item,prestacao_mensal,empregado,estado_civil,A11,graduado,A13,A14,emprestimo,aprovacao
0,0,30.83,0.000,u,g,w,v,1.25,True,True,1,False,g,202.0,0,1
1,1,58.67,4.460,u,g,q,h,3.04,True,True,6,False,g,43.0,560,1
2,1,24.50,0.500,u,g,q,h,1.50,True,False,0,False,g,280.0,824,1
3,0,27.83,1.540,u,g,w,v,3.75,True,True,5,True,g,100.0,3,1
4,0,20.17,5.625,u,g,w,v,1.71,True,False,0,False,s,120.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,0,21.08,10.085,y,p,e,h,1.25,False,False,0,False,g,260.0,0,0
686,1,22.67,0.750,u,g,c,v,2.00,False,True,2,True,g,200.0,394,0
687,1,25.25,13.500,y,p,ff,ff,2.00,False,True,1,True,g,200.0,1,0
688,0,17.92,0.205,u,g,aa,v,0.04,False,False,0,False,g,280.0,750,0


In [55]:
#Substituindo caracteres por números nos atributos explicativos
df['empregado']= df['empregado'].astype(int)
df['estado_civil']= df['estado_civil'].astype(int)
df

Unnamed: 0,sexo,renda,renda_conjunta,tempo,regiao,A6,item,prestacao_mensal,empregado,estado_civil,A11,graduado,A13,A14,emprestimo,aprovacao
0,0,30.83,0.000,u,g,w,v,1.25,1,1,1,False,g,202.0,0,1
1,1,58.67,4.460,u,g,q,h,3.04,1,1,6,False,g,43.0,560,1
2,1,24.50,0.500,u,g,q,h,1.50,1,0,0,False,g,280.0,824,1
3,0,27.83,1.540,u,g,w,v,3.75,1,1,5,True,g,100.0,3,1
4,0,20.17,5.625,u,g,w,v,1.71,1,0,0,False,s,120.0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,0,21.08,10.085,y,p,e,h,1.25,0,0,0,False,g,260.0,0,0
686,1,22.67,0.750,u,g,c,v,2.00,0,1,2,True,g,200.0,394,0
687,1,25.25,13.500,y,p,ff,ff,2.00,0,1,1,True,g,200.0,1,0
688,0,17.92,0.205,u,g,aa,v,0.04,0,0,0,False,g,280.0,750,0


In [56]:
#Resolvendo problema de regressão usando o Random Forest
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

#Separando atributos explicativos e o atributo TARGET
X = df[['sexo', 'renda', 'renda_conjunta', 'empregado', 'estado_civil', 'emprestimo']]
y = df.aprovacao
X.shape, y.shape

#Divisão dos dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Treinar o modelo
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

#Prever o modelo
y_pred = rf_model.predict(X_test)

#Avaliar o modelo
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 0.1960144927536232


In [57]:
X

Unnamed: 0,sexo,renda,renda_conjunta,empregado,estado_civil,emprestimo
0,0,30.83,0.000,1,1,0
1,1,58.67,4.460,1,1,560
2,1,24.50,0.500,1,0,824
3,0,27.83,1.540,1,1,3
4,0,20.17,5.625,1,0,0
...,...,...,...,...,...,...
685,0,21.08,10.085,0,0,0
686,1,22.67,0.750,0,1,394
687,1,25.25,13.500,0,1,1
688,0,17.92,0.205,0,0,750


In [69]:
#Criando um conjunto de dados com valores aleatórios para testar o Random Forest
dados_teste = pd.DataFrame({
    'sexo': np.random.choice([0, 1], size=10),
    'renda': np.random.uniform(10, 60, size=10),
    'renda_conjunta': np.random.uniform(1000, 2500, size=10),
    'empregado': np.random.choice([0, 1], size=10),
    'estado_civil': np.random.choice([0, 1], size=10),
    'emprestimo': np.random.uniform(0, 900, size=10),
})

#Fazer previsões sobre os dados de teste
dados_rf = dados_teste.copy()
probabilidade_aprovacao = rf_model.predict(dados_rf)

#Adicionar a probabilidade de aprovação de crédito
dados_rf['probabilidade_aprovacao'] = probabilidade_aprovacao

dados_rf

Unnamed: 0,sexo,renda,renda_conjunta,empregado,estado_civil,emprestimo,probabilidade_aprovacao
0,0,18.657095,1267.889457,0,1,298.626456,0.28
1,1,26.058459,1248.247554,1,1,625.762901,0.85
2,1,40.483718,1569.184266,0,1,610.504118,0.26
3,1,46.498104,1984.117742,0,0,332.637964,0.26
4,1,12.82938,1478.643924,1,1,546.998432,0.81
5,0,15.665647,1165.976139,1,1,538.331693,0.81
6,0,46.213211,2394.68542,1,0,703.472384,0.94
7,0,24.14863,2317.501284,0,1,598.694772,0.28
8,0,44.63457,1991.631508,1,0,65.933761,0.12
9,1,42.718323,1059.893994,1,0,879.437339,0.97


In [70]:
#Resolvendo problema de regressão usando o K-NN
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor

#Vamos normalizar os dados, pois o K-NN é muito sensível à escala
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Treinamento do Modelo K-NN
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)

# Prever o modelo
y_pred = knn_model.predict(X_test_scaled)

# Avaliar o modelo
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 0.2217391304347826


In [71]:
#Fazer previsões sobre os dados de teste
dados_knn = dados_teste.copy()
dados_teste_scaled = scaler.transform(dados_knn)
probabilidade_aprovacao = knn_model.predict(dados_teste_scaled)

#Adicionar a probabilidade de aprovação de crédito
dados_knn['probabilidade_aprovacao'] = probabilidade_aprovacao

dados_knn

Unnamed: 0,sexo,renda,renda_conjunta,empregado,estado_civil,emprestimo,probabilidade_aprovacao
0,0,18.657095,1267.889457,0,1,298.626456,0.6
1,1,26.058459,1248.247554,1,1,625.762901,0.6
2,1,40.483718,1569.184266,0,1,610.504118,0.6
3,1,46.498104,1984.117742,0,0,332.637964,0.6
4,1,12.82938,1478.643924,1,1,546.998432,0.6
5,0,15.665647,1165.976139,1,1,538.331693,0.6
6,0,46.213211,2394.68542,1,0,703.472384,0.6
7,0,24.14863,2317.501284,0,1,598.694772,0.6
8,0,44.63457,1991.631508,1,0,65.933761,0.6
9,1,42.718323,1059.893994,1,0,879.437339,0.6


In [72]:
#Resolvendo problema de regressão usando Árvore de Decisão
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_text

#Treinamento do modelo de Árvore de Decisão
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)

#Prever o modelo
y_pred = tree_model.predict(X_test)

#Avaliar o modelo
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')

Mean Absolute Error: 0.16666666666666666


In [73]:
#Visualização da Árvore
tree_rules = export_text(tree_model, feature_names=X.columns.tolist())
print(tree_rules)

|--- empregado <= 0.50
|   |--- renda_conjunta <= 0.17
|   |   |--- renda <= 35.67
|   |   |   |--- emprestimo <= 20.50
|   |   |   |   |--- renda <= 23.12
|   |   |   |   |   |--- value: [0.00]
|   |   |   |   |--- renda >  23.12
|   |   |   |   |   |--- renda <= 28.57
|   |   |   |   |   |   |--- value: [1.00]
|   |   |   |   |   |--- renda >  28.57
|   |   |   |   |   |   |--- value: [0.00]
|   |   |   |--- emprestimo >  20.50
|   |   |   |   |--- renda <= 21.29
|   |   |   |   |   |--- renda_conjunta <= 0.11
|   |   |   |   |   |   |--- value: [0.00]
|   |   |   |   |   |--- renda_conjunta >  0.11
|   |   |   |   |   |   |--- value: [1.00]
|   |   |   |   |--- renda >  21.29
|   |   |   |   |   |--- value: [0.00]
|   |   |--- renda >  35.67
|   |   |   |--- sexo <= 0.50
|   |   |   |   |--- value: [1.00]
|   |   |   |--- sexo >  0.50
|   |   |   |   |--- renda <= 62.46
|   |   |   |   |   |--- value: [0.00]
|   |   |   |   |--- renda >  62.46
|   |   |   |   |   |--- value: [1.00]


In [74]:
#Fazer previsões sobre os dados de teste
dados_dt = dados_teste.copy()
probabilidade_aprovacao = tree_model.predict(dados_dt)

#Adicionar a probabilidade de aprovação de crédito
dados_dt['probabilidade_aprovacao'] = probabilidade_aprovacao

dados_dt

Unnamed: 0,sexo,renda,renda_conjunta,empregado,estado_civil,emprestimo,probabilidade_aprovacao
0,0,18.657095,1267.889457,0,1,298.626456,1.0
1,1,26.058459,1248.247554,1,1,625.762901,1.0
2,1,40.483718,1569.184266,0,1,610.504118,1.0
3,1,46.498104,1984.117742,0,0,332.637964,1.0
4,1,12.82938,1478.643924,1,1,546.998432,1.0
5,0,15.665647,1165.976139,1,1,538.331693,1.0
6,0,46.213211,2394.68542,1,0,703.472384,1.0
7,0,24.14863,2317.501284,0,1,598.694772,1.0
8,0,44.63457,1991.631508,1,0,65.933761,0.0
9,1,42.718323,1059.893994,1,0,879.437339,1.0
