In [61]:
import pickle
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

In [62]:
data_train=pd.read_csv('../data/processed/features_for_model.csv')
data_test=pd.read_csv('../data/processed/test_dataset.csv')

In [63]:
x_features=data_train.drop(['Default'],axis=1)
y_target=data_train['Default']

x_features_test=data_test.drop(['Default'],axis=1)
y_target_test=data_test['Default']

In [64]:
with open ('../Artifacts/pipeline.pkl','rb') as f:
    credit_default_model_pipeline=pickle.load(f)

In [65]:
x_features_test_arr=credit_default_model_pipeline.transform(x_features_test)
df_features_test=pd.DataFrame(x_features_test_arr,columns=x_features_test.columns)
df_features_test

Unnamed: 0,Prct_uso_tc,Edad,Nro_prestao_retrasados,Prct_deuda_vs_ingresos,Mto_ingreso_mensual,Nro_prod_financieros_deuda,Nro_retraso_60dias,Nro_creditos_hipotecarios,Nro_retraso_ultm3anios,Nro_dependiente
0,0.310211,-0.286772,-0.099806,-0.691093,0.689499,-0.068067,-0.064097,-0.017477,-0.058353,0.216263
1,1.351092,-0.083727,-0.099806,1.811477,0.171510,0.354444,-0.064097,-0.017477,-0.058353,2.024714
2,1.900000,-1.369679,-0.099806,-0.137763,-1.517463,-0.913090,-0.064097,-0.907701,-0.058353,-0.687963
3,1.841611,-0.828225,0.826895,1.811477,0.171510,-0.279323,-0.064097,-0.017477,-0.058353,2.024714
4,-0.205455,-0.828225,-0.099806,-0.336876,0.132749,0.988211,-0.064097,0.872746,-0.058353,-0.687963
...,...,...,...,...,...,...,...,...,...,...
31495,-0.848637,1.472951,-0.099806,-0.931702,1.438730,0.565700,-0.064097,-0.017477,-0.058353,-0.687963
31496,1.156834,-1.437360,-0.099806,-0.659442,-0.630605,-0.490579,-0.064097,-0.907701,-0.058353,-0.687963
31497,0.733603,-0.286772,-0.099806,-0.146622,1.757670,1.621979,-0.064097,0.872746,-0.058353,0.216263
31498,-0.793920,-0.895907,-0.099806,-0.858712,0.371216,0.143188,-0.064097,-0.907701,-0.058353,-0.687963


### Modelo Naive Bayes

In [66]:
model_nb = GaussianNB(var_smoothing=0.5)
model_nb.fit(x_features, y_target)
y_pred_nb = model_nb.predict(df_features_test)

acc_nb=accuracy_score(y_target_test,y_pred_nb)


### Modelo Random Forest

In [67]:
rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42, min_samples_split=10)
rf_classifier.fit(x_features, y_target)
y_pred_rf = rf_classifier.predict(df_features_test)

acc_rf=accuracy_score(y_target_test,y_pred_rf)


### Modelo Decision Tree

In [68]:
clf = DecisionTreeClassifier(criterion='gini',max_depth=5, min_samples_split=5)
clf.fit(x_features, y_target)
y_pred_dt = clf.predict(df_features_test)

acc_dt=accuracy_score(y_target_test,y_pred_dt)

### Modelo de Regresion

In [69]:
model_rl = LogisticRegression(C=10,solver='liblinear',penalty='l2')
model_rl.fit(x_features, y_target)
y_pred_rl = model_rl.predict(df_features_test)

acc_rl=accuracy_score(y_target_test,y_pred_rl)

### Modelo de Red Neuronal

In [70]:
model_rn = Sequential([

    Dense(5, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
])

model_rn.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model_rn.fit(x_features, y_target, epochs=10) 

y_pred_rn1 = model_rn.predict(df_features_test)
y_pred_rn = (y_pred_rn1 > 0.5).astype(int)
acc_rn=accuracy_score(y_target_test,y_pred_rn)


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 696us/step - accuracy: 0.9195 - loss: 0.3339
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 668us/step - accuracy: 0.9331 - loss: 0.1962
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 618us/step - accuracy: 0.9343 - loss: 0.1857
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 619us/step - accuracy: 0.9344 - loss: 0.1860
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 638us/step - accuracy: 0.9340 - loss: 0.1853
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 682us/step - accuracy: 0.9373 - loss: 0.1803
Epoch 7/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 666us/step - accuracy: 0.9363 - loss: 0.1796
Epoch 8/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 620us/step - accuracy: 0.9359 - loss: 0.1836
Epoch 9/10
[1m2297

In [71]:
# Definir la red neuronal
def create_neural_network():
    model = Sequential()
    model.add(Dense(5, activation='relu', input_shape=(10,)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


# Crear un wrapper para la red neuronal
class NeuralNetworkStep:
    def __init__(self):
        self.model = create_neural_network()

    def fit(self, X, y):
        self.model.fit(x_features, y_target, epochs=10)

    def predict(self, X):
        return self.model.predict(df_features_test)

In [72]:
# Crear un diccionario con los modelos y sus precisiones
modelos = {'nb': acc_nb, 'rf': acc_rf, 'dt': acc_dt, 'rl': acc_rl, 'rn': acc_rn}

# Encontrar el modelo con la mayor precisión
mejor_modelo = max(modelos, key=modelos.get)

print("El mejor modelo es:", mejor_modelo)


El mejor modelo es: rn


In [73]:
if mejor_modelo == "nb":
    credit_default_model_pipeline.steps.append(('modelo_naive_bayes', GaussianNB()))
elif mejor_modelo == "rf":
    credit_default_model_pipeline.steps.append(('modelo_random_forest', RandomForestClassifier()))
elif mejor_modelo == "dt":
    credit_default_model_pipeline.steps.append(('modelo_decision_tree', DecisionTreeClassifier()))
elif mejor_modelo == "rl":
    credit_default_model_pipeline.steps.append(('modelo_regresion_lineal', LogisticRegression()))
elif mejor_modelo == "rn":
    credit_default_model_pipeline.steps.append(('modelo_red_neuronal', NeuralNetworkStep()))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [74]:
credit_default_model_pipeline

In [75]:
train_dataset = pd.read_csv("../data/raw/train.csv")
train_dataset.drop(['ID'], axis=1, inplace=True)
train_dataset_features = train_dataset.drop('Default', axis=1)
train_dataset_target = train_dataset['Default']

In [76]:
credit_default_model_pipeline.fit(train_dataset_features,train_dataset_target)

Epoch 1/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 645us/step - accuracy: 0.8835 - loss: 0.3421
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 753us/step - accuracy: 0.9328 - loss: 0.2032
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 692us/step - accuracy: 0.9329 - loss: 0.1913
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 790us/step - accuracy: 0.9364 - loss: 0.1853
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 892us/step - accuracy: 0.9352 - loss: 0.1842
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 831us/step - accuracy: 0.9336 - loss: 0.1864
Epoch 7/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 654us/step - accuracy: 0.9356 - loss: 0.1823
Epoch 8/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 648us/step - accuracy: 0.9336 - loss: 0.1874
Epoch 9/

In [78]:
test_dataset = pd.read_csv("../data/raw/test.csv")
test_dataset.drop(['ID'], axis=1, inplace=True)
rn_prediccion = credit_default_model_pipeline.predict(test_dataset)

rn_prediccion = (rn_prediccion > 0.5).astype(int)

[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 692us/step


In [79]:
rn_prediccion

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]])