In [14]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import mlflow 

In [15]:
dataset = pd.read_csv("../data/processed/dataset_for_model.csv")

In [3]:
dataset.head()

Unnamed: 0,Default,Prct_uso_tc,Edad,Nro_prestao_retrasados,Prct_deuda_vs_ingresos,Mto_ingreso_mensual,Nro_prod_financieros_deuda,Nro_retraso_60dias,Nro_creditos_hipotecarios,Nro_retraso_ultm3anios,Nro_dependiente
0,1,0.766127,45.0,2,0.802982,9120.0,13.0,0,6,0,2
1,0,0.957151,40.0,0,0.121876,2600.0,4.0,0,0,0,1
2,0,0.65818,38.0,1,0.085113,3042.0,2.0,1,0,0,0
3,0,0.23381,30.0,0,0.03605,3300.0,5.0,0,0,0,0
4,0,0.907239,49.0,1,0.024926,12598.25,7.0,0,1,0,0


In [16]:
X = dataset.drop(['Default'], axis=1)
y = dataset['Default']

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Configuramos y calculamos el StandarScaler

In [18]:
std_scaler = StandardScaler()
std_scaler.fit(X_train) # calcular los valores para el scaler

Guardamos el scaler configurado

In [7]:
import pickle


with open('../artifacts/std_scaler.pkl','wb') as f:
    pickle.dump(std_scaler,f)

# Creacion de Modelos

In [8]:
mlflow.set_tracking_uri('http://127.0.0.1:8080')
mlflow.set_experiment('Credit Default Predict Model')

2024/12/19 22:42:35 INFO mlflow.tracking.fluent: Experiment with name 'Credit Default Predict Model' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/944934590087954932', creation_time=1734669755422, experiment_id='944934590087954932', last_update_time=1734669755422, lifecycle_stage='active', name='Credit Default Predict Model', tags={}>

# Modelo 1

### Naive Bayes 1

In [24]:
with mlflow.start_run():
    
    #Naive Bayes 1
    nb_classifier = GaussianNB()
    nb_classifier.fit(X_train, y_train)
    y_pred = nb_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_nb1=dict(var_smoothing=1e-9)
    mlflow.log_params(params_nb1)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(nb_classifier,"Naive Bayes 1")

    mlflow.end_run()



🏃 View run resilient-chimp-984 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/833bedaca05a46ecaf51dee05757a9ac
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Naive Bayes 2

In [25]:
with mlflow.start_run():
    
    #Naive Bayes 2
    nb_classifier = GaussianNB(var_smoothing=2.0)
    nb_classifier.fit(X_train, y_train)
    y_pred = nb_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_nb2=dict(var_smoothing=2.0)
    mlflow.log_params(params_nb2)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(nb_classifier,"Naive Bayes 2")

    mlflow.end_run()



🏃 View run intrigued-sow-934 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/7afd867747454a7c803976391588de86
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Naive Bayes 3

In [26]:
with mlflow.start_run():
    #Naive Bayes 3
    nb_classifier = GaussianNB(var_smoothing=0.5)
    nb_classifier.fit(X_train, y_train)
    y_pred = nb_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_nb3=dict(var_smoothing=0.5)
    mlflow.log_params(params_nb3)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(nb_classifier,"Naive Bayes 3") 

    mlflow.end_run()



🏃 View run wise-elk-813 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/84ef61a542e94a62a3fc75278a1af41f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


# Modelo 2

### Random Forest 1

In [28]:
with mlflow.start_run():
    
    #Random Forest 1
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_classifier.fit(X_train, y_train)
    y_pred = rf_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_rf1=dict(n_estimators=100, random_state=42)
    mlflow.log_params(params_rf1)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rf_classifier,"Random Forest 1")

    mlflow.end_run()



🏃 View run stately-shrike-826 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/90e3509295f34d0fa56b6f6b396a989f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Random Forest 2

In [29]:
with mlflow.start_run():

    #Random Forest 2
    rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42, min_samples_split=10)
    rf_classifier.fit(X_train, y_train)
    y_pred = rf_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_rf2=dict(n_estimators=50, random_state=42, min_samples_split=10)
    mlflow.log_params(params_rf2)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rf_classifier,"Random Forest 2")

    mlflow.end_run()



🏃 View run persistent-grub-684 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/e8bee5a630514ee699bcade74bc01da3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


Guardamos modelo para produccion

In [186]:
with open('../models/random_forest_v1.pkl','wb') as f:
    pickle.dump(model,f)

### Random Forest 3

In [30]:
with mlflow.start_run():
    
    #Random Forest 3
    rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42, min_samples_split=20)
    rf_classifier.fit(X_train, y_train)
    y_pred = rf_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_rf3=dict(n_estimators=100, random_state=42, min_samples_split=20)
    mlflow.log_params(params_rf3)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rf_classifier,"Random Forest 3")

    mlflow.end_run()



🏃 View run unruly-loon-350 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/56b8d5f74f0a4e0c811c6d97a41be4ab
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


# Modelo 3

### Decision Tree 1

In [31]:
with mlflow.start_run():

    #Decision Tree 1
    dt_classifier = DecisionTreeClassifier()
    dt_classifier.fit(X_train, y_train)
    y_pred = dt_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_dt1=dict(criterion='gini',max_depth='ninguno')
    mlflow.log_params(params_dt1)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(dt_classifier,"Decision Tree 1")

    mlflow.end_run()



🏃 View run righteous-eel-639 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/98fba37e0c3f4dbab966d8a6f9e3651b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Decision Tree 2

In [32]:
with mlflow.start_run():   
    #Decision Tree 2
    dt_classifier = DecisionTreeClassifier(criterion='gini',max_depth=2)
    dt_classifier.fit(X_train, y_train)
    y_pred = dt_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_dt2=dict(criterion='gini',max_depth=2)
    mlflow.log_params(params_dt2)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(dt_classifier,"Decision Tree 2")

    mlflow.end_run()



🏃 View run colorful-worm-253 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/95a816eb685f43559e8e56a0a51a2cf3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Decision Tree 3

In [33]:
with mlflow.start_run():    
    #Decision Tree 3
    dt_classifier = DecisionTreeClassifier(criterion='gini',max_depth=5, min_samples_split=5)
    dt_classifier.fit(X_train, y_train)
    y_pred = dt_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_dt3=dict(criterion='gini',max_depth=5, min_samples_split=5)
    mlflow.log_params(params_dt3)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(dt_classifier,"Decision Tree 3")
    
    mlflow.end_run()



🏃 View run likeable-mule-557 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/0a3da77e259e405d8969be2f5beb4856
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


# Modelo 4

### Modelo de regresion 1

In [34]:
with mlflow.start_run():
 
    #Logistic Regression 1
    lr_classifier = LogisticRegression()
    lr_classifier.fit(X_train, y_train)
    y_pred = lr_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_lr1=dict(C=1.0,solver='liblinear')
    mlflow.log_params(params_lr1)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(lr_classifier,"Logistic Regression 1")

    mlflow.end_run()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


🏃 View run masked-squirrel-246 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/fd3b414e3f1644cf8191656b9789f8da
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Modelo de regresion 2

In [35]:
with mlflow.start_run(): 
    #Logistic Regression 2
    lr_classifier = LogisticRegression(C=0.01,solver='saga')
    lr_classifier.fit(X_train, y_train)
    y_pred = lr_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_lr2=dict(C=0.01,solver='saga')
    mlflow.log_params(params_lr2)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(lr_classifier,"Logistic Regression 2")

    mlflow.end_run()



🏃 View run learned-tern-38 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/2e3e42db0e7d43a1b61524462e333f42
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Modelo de regresion 3

In [36]:
with mlflow.start_run():  
    #Logistic Regression 3
    lr_classifier = LogisticRegression(C=10,solver='liblinear',penalty='l2')
    lr_classifier.fit(X_train, y_train)
    y_pred = lr_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    #Registramos Hiper-parametros
    params_lr3=dict(C=10,solver='liblinear',penalty='l2')
    mlflow.log_params(params_lr3)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',accuracy)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(lr_classifier,"Logistic Regression 3")
    
    mlflow.end_run()



🏃 View run placid-squirrel-725 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/193c123977c445e8af2e16553c0fe463
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


# Modelo 5

### Modelo de red reuronal 1

In [37]:
with mlflow.start_run():
    #Neural Network 1
    rn_classifier = Sequential([
    Dense(64, activation='relu', input_shape=(10,)),  # Capa oculta con 64 neuronas
    Dense(1, activation='sigmoid') 
    ])
    rn_classifier.compile(optimizer='adam',
                loss='binary_crossentropy', 

                metrics=['accuracy'])
    rn_classifier.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
    y_pred_rn = rn_classifier.predict(X_test)
    y_pred_rn = (y_pred_rn > 0.5).astype(int)
    acc_rn=accuracy_score(y_test,y_pred_rn)
    #Registramos Hiper-parametros
    params_rn1=dict(optimizer='adam',loss='binary_crossentropy')
    mlflow.log_params(params_rn1)
    mlflow.log_param('1ra capa',64)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',acc_rn)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rn_classifier,"Neural Network 1")

    mlflow.end_run() 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.8948 - loss: 2.3916 - val_accuracy: 0.6849 - val_loss: 0.6936
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8921 - loss: 1.5198 - val_accuracy: 0.9250 - val_loss: 0.5374
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8869 - loss: 1.8850 - val_accuracy: 0.9268 - val_loss: 0.7360
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8898 - loss: 1.5183 - val_accuracy: 0.8757 - val_loss: 0.3591
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8907 - loss: 1.3289 - val_accuracy: 0.9342 - val_loss: 3.9543
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.8895 - loss: 1.4216 - val_accuracy: 0.9342 - val_loss: 2.9181
Epoch 7/10
[1m2



🏃 View run traveling-snail-912 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/77d91b4b76a643898117b3da9bc2ccd9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Modelo de red reuronal 2

In [38]:
with mlflow.start_run():
    #Neural Network 2
    rn_classifier = Sequential([
    Dense(5, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
    ])
    rn_classifier.compile(optimizer='adam',
                loss='binary_crossentropy', 

                metrics=['accuracy'])

    rn_classifier.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 
    y_pred_rn = rn_classifier.predict(X_test)
    y_pred_rn = (y_pred_rn > 0.5).astype(int)
    acc_rn=accuracy_score(y_test,y_pred_rn)
    #Registramos Hiper-parametros
    params_rn2=dict(optimizer='adam',loss='binary_crossentropy')
    mlflow.log_params(params_rn2)
    mlflow.log_param('1ra capa',5)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',acc_rn)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rn_classifier,"Neural Network 2")

    mlflow.end_run()


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9329 - loss: 0.5247 - val_accuracy: 0.9338 - val_loss: 0.2887
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9328 - loss: 0.2705 - val_accuracy: 0.9341 - val_loss: 0.2459
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9332 - loss: 0.2470 - val_accuracy: 0.9341 - val_loss: 0.2431
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9341 - loss: 0.2427 - val_accuracy: 0.9342 - val_loss: 0.2424
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9326 - loss: 0.2465 - val_accuracy: 0.9342 - val_loss: 0.2424
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9347 - loss: 0.2410 - val_accuracy: 0.9342 - val_loss: 0.2425
Epoch 7/10
[1m2297/2297[0



🏃 View run silent-dog-676 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/20a45cfa2af841ce93b2559b177ada69
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932


### Modelo de red reuronal 3

In [39]:
with mlflow.start_run():
    #Neural Network 3
    rn_classifier = Sequential([

    Dense(10, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
    ])
    rn_classifier.compile(optimizer='adam',
                loss='binary_crossentropy', 

                metrics=['accuracy'])
    rn_classifier.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 
    y_pred_rn = rn_classifier.predict(X_test)
    y_pred_rn = (y_pred_rn > 0.5).astype(int)
    acc_rn=accuracy_score(y_test,y_pred_rn)
    #Registramos Hiper-parametros
    params_rn3=dict(optimizer='adam',loss='binary_crossentropy')
    mlflow.log_params(params_rn2)
    mlflow.log_param('1ra capa',10)
    #Registramos métricas
    mlflow.log_metric('accuracy_score',acc_rn)
    #Registramos  modelo entrenado
    mlflow.sklearn.log_model(rn_classifier,"Neural Network 3")
    
    mlflow.end_run()

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9272 - loss: 46.1424 - val_accuracy: 0.9343 - val_loss: 0.2927
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9265 - loss: 0.2970 - val_accuracy: 0.9341 - val_loss: 0.2297
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9264 - loss: 0.2905 - val_accuracy: 0.9334 - val_loss: 0.2233
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9255 - loss: 0.2889 - val_accuracy: 0.9342 - val_loss: 0.2967
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9272 - loss: 0.2745 - val_accuracy: 0.8573 - val_loss: 0.3723
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9254 - loss: 0.2907 - val_accuracy: 0.9341 - val_loss: 0.2811
Epoch 7/10
[1m2297/2297[



🏃 View run delightful-robin-556 at: http://127.0.0.1:8080/#/experiments/944934590087954932/runs/49e3e284ed9e4ed69d6d72814311e07b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/944934590087954932
