In [113]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [114]:
dataset = pd.read_csv("../data/processed/dataset_for_model.csv")

In [115]:
dataset.head()

Unnamed: 0,Default,Prct_uso_tc,Edad,Nro_prestao_retrasados,Prct_deuda_vs_ingresos,Mto_ingreso_mensual,Nro_prod_financieros_deuda,Nro_retraso_60dias,Nro_creditos_hipotecarios,Nro_retraso_ultm3anios,Nro_dependiente
0,1,0.766127,45.0,2,0.802982,9120.0,13.0,0,6,0,2
1,0,0.957151,40.0,0,0.121876,2600.0,4.0,0,0,0,1
2,0,0.65818,38.0,1,0.085113,3042.0,2.0,1,0,0,0
3,0,0.23381,30.0,0,0.03605,3300.0,5.0,0,0,0,0
4,0,0.907239,49.0,1,0.024926,12598.25,7.0,0,1,0,0


In [116]:
X = dataset.drop(['Default'], axis=1)
y = dataset['Default']

In [117]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Configuramos y calculamos el StandarScaler

In [118]:
std_scaler = StandardScaler()
std_scaler.fit(X_train) # calcular los valores para el scaler

Guardamos el scaler configurado

In [157]:
import pickle


with open('../artifacts/std_scaler.pkl','wb') as f:
    pickle.dump(std_scaler,f)

# Creacion de Modelos

# Modelo 1

### Naive Bayes 1

In [120]:
model = GaussianNB()

In [121]:
model.fit(X_train, y_train)

In [122]:
y_pred = model.predict(X_test)

In [123]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9338095238095238


### Naive Bayes 2

In [124]:
model = GaussianNB(var_smoothing=2.0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [125]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9341587301587302


### Naive Bayes 3

In [126]:
model = GaussianNB(var_smoothing=0.5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [127]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9341587301587302


# Modelo 2

### Random Forest 1

In [128]:
from sklearn.ensemble import RandomForestClassifier

In [129]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [130]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.937047619047619


### Random Forest 2

In [131]:
rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42, min_samples_split=10)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [132]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.937936507936508


Guardamos modelo para produccion

In [158]:
with open('../models/random_forest_v1.pkl','wb') as f:
    pickle.dump(model,f)

### Random Forest 3

In [133]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42, min_samples_split=20)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [134]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9378095238095238


# Modelo 3

### Decision Tree 1

In [135]:
from sklearn.tree import DecisionTreeClassifier

In [136]:
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [137]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.897079365079365


### Decision Tree 2

In [138]:
clf = DecisionTreeClassifier(criterion='gini',max_depth=2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [139]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9361269841269841


### Decision Tree 3

In [140]:
clf = DecisionTreeClassifier(criterion='gini',max_depth=5, min_samples_split=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [141]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9362539682539682


# Modelo 4

### Modelo de regresion 1

In [142]:
from sklearn.linear_model import LogisticRegression

In [143]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [144]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9338412698412698


### Modelo de regresion 2

In [145]:
model = LogisticRegression(C=0.01,solver='saga')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [146]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.933936507936508


### Modelo de regresion 3

In [147]:
model = LogisticRegression(C=10,solver='liblinear',penalty='l2')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [148]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9355555555555556


# Modelo 5

### Modelo de red reuronal 1

In [149]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold

In [150]:
model = Sequential([

    Dense(64, activation='relu', input_shape=(10,)),  # Capa oculta con 64 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 868us/step - accuracy: 0.8563 - loss: 16.0620 - val_accuracy: 0.9137 - val_loss: 0.2630
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 783us/step - accuracy: 0.8939 - loss: 1.4397 - val_accuracy: 0.9283 - val_loss: 0.5137
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 788us/step - accuracy: 0.8932 - loss: 1.1156 - val_accuracy: 0.9126 - val_loss: 0.2840
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 763us/step - accuracy: 0.8979 - loss: 0.9985 - val_accuracy: 0.9342 - val_loss: 1.4431
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 825us/step - accuracy: 0.8924 - loss: 1.2449 - val_accuracy: 0.8048 - val_loss: 0.4351
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 754us/step - accuracy: 0.8975 - loss: 0.8772 - val_accuracy: 0.9323 - val_loss: 0.6612
Epoch 7/10
[1

<keras.src.callbacks.history.History at 0x2395244b0b0>

In [151]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 605us/step - accuracy: 0.9065 - loss: 0.7323
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 433us/step


### Modelo de red reuronal 2

In [152]:
model = Sequential([

    Dense(5, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 793us/step - accuracy: 0.6846 - loss: 266.4034 - val_accuracy: 0.9198 - val_loss: 0.3864
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 803us/step - accuracy: 0.9094 - loss: 0.3551 - val_accuracy: 0.9192 - val_loss: 0.2601
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 748us/step - accuracy: 0.9172 - loss: 0.3416 - val_accuracy: 0.9299 - val_loss: 0.2511
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 738us/step - accuracy: 0.9248 - loss: 0.3129 - val_accuracy: 0.9341 - val_loss: 0.3960
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 757us/step - accuracy: 0.9245 - loss: 0.3154 - val_accuracy: 0.9335 - val_loss: 0.2813
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 741us/step - accuracy: 0.9235 - loss: 0.3071 - val_accuracy: 0.9305 - val_loss: 0.2239
Epoch 7/10
[

<keras.src.callbacks.history.History at 0x239525bfda0>

In [153]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 593us/step - accuracy: 0.9228 - loss: 0.3031
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 520us/step


### Modelo de red reuronal 3

In [154]:
model = Sequential([

    Dense(10, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 803us/step - accuracy: 0.8433 - loss: 37.8183 - val_accuracy: 0.9342 - val_loss: 0.7571
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 788us/step - accuracy: 0.9185 - loss: 0.3967 - val_accuracy: 0.9326 - val_loss: 0.2188
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 776us/step - accuracy: 0.9169 - loss: 0.4286 - val_accuracy: 0.8674 - val_loss: 0.3481
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 794us/step - accuracy: 0.9143 - loss: 0.3935 - val_accuracy: 0.9330 - val_loss: 0.2607
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 740us/step - accuracy: 0.9120 - loss: 0.4320 - val_accuracy: 0.8963 - val_loss: 0.2734
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 921us/step - accuracy: 0.9147 - loss: 0.4040 - val_accuracy: 0.7797 - val_loss: 0.4706
Epoch 7/10
[1

<keras.src.callbacks.history.History at 0x23948a5e1b0>

In [155]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 591us/step - accuracy: 0.9133 - loss: 0.4048
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 452us/step
