In [166]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [167]:
dataset = pd.read_csv("../data/processed/dataset_for_model.csv")

In [168]:
dataset.head()

Unnamed: 0,Default,Prct_uso_tc,Edad,Nro_prestao_retrasados,Prct_deuda_vs_ingresos,Mto_ingreso_mensual,Nro_prod_financieros_deuda,Nro_retraso_60dias,Nro_creditos_hipotecarios,Nro_retraso_ultm3anios,Nro_dependiente
0,1,0.766127,45.0,2,0.802982,9120.0,13.0,0,6,0,2
1,0,0.957151,40.0,0,0.121876,2600.0,4.0,0,0,0,1
2,0,0.65818,38.0,1,0.085113,3042.0,2.0,1,0,0,0
3,0,0.23381,30.0,0,0.03605,3300.0,5.0,0,0,0,0
4,0,0.907239,49.0,1,0.024926,12598.25,7.0,0,1,0,0


In [169]:
X = dataset.drop(['Default'], axis=1)
y = dataset['Default']

In [170]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

Configuramos y calculamos el StandarScaler

In [171]:
std_scaler = StandardScaler()
std_scaler.fit(X_train) # calcular los valores para el scaler

Guardamos el scaler configurado

In [172]:
import pickle


with open('../artifacts/std_scaler.pkl','wb') as f:
    pickle.dump(std_scaler,f)

# Creacion de Modelos

# Modelo 1

### Naive Bayes 1

In [173]:
model = GaussianNB()

In [174]:
model.fit(X_train, y_train)

In [175]:
y_pred = model.predict(X_test)

In [176]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9338095238095238


### Naive Bayes 2

In [177]:
model = GaussianNB(var_smoothing=2.0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [178]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9341587301587302


### Naive Bayes 3

In [179]:
model = GaussianNB(var_smoothing=0.5)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [180]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9341587301587302


# Modelo 2

### Random Forest 1

In [181]:
from sklearn.ensemble import RandomForestClassifier

In [182]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [183]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.937047619047619


### Random Forest 2

In [184]:
rf_classifier = RandomForestClassifier(n_estimators=50, random_state=42, min_samples_split=10)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [185]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.937936507936508


Guardamos modelo para produccion

In [186]:
with open('../models/random_forest_v1.pkl','wb') as f:
    pickle.dump(model,f)

### Random Forest 3

In [187]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42, min_samples_split=20)
rf_classifier.fit(X_train, y_train)
y_pred = rf_classifier.predict(X_test)

In [188]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9378095238095238


# Modelo 3

### Decision Tree 1

In [189]:
from sklearn.tree import DecisionTreeClassifier

In [190]:
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [191]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.8977142857142857


### Decision Tree 2

In [192]:
clf = DecisionTreeClassifier(criterion='gini',max_depth=2)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [193]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9361269841269841


### Decision Tree 3

In [194]:
clf = DecisionTreeClassifier(criterion='gini',max_depth=5, min_samples_split=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [195]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9362539682539682


# Modelo 4

### Modelo de regresion 1

In [196]:
from sklearn.linear_model import LogisticRegression

In [197]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [198]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9338412698412698


### Modelo de regresion 2

In [199]:
model = LogisticRegression(C=0.01,solver='saga')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [200]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.933936507936508


### Modelo de regresion 3

In [201]:
model = LogisticRegression(C=10,solver='liblinear',penalty='l2')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [202]:
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

Accuracy: 0.9355555555555556


# Modelo 5

### Modelo de red reuronal 1

In [203]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold

In [204]:
model = Sequential([

    Dense(64, activation='relu', input_shape=(10,)),  # Capa oculta con 64 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 800us/step - accuracy: 0.8438 - loss: 60.1666 - val_accuracy: 0.9342 - val_loss: 2.7279
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 698us/step - accuracy: 0.8933 - loss: 1.2946 - val_accuracy: 0.9332 - val_loss: 0.9026
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 826us/step - accuracy: 0.8944 - loss: 0.9526 - val_accuracy: 0.9341 - val_loss: 2.3064
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 769us/step - accuracy: 0.8940 - loss: 1.0553 - val_accuracy: 0.9299 - val_loss: 0.5971
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 775us/step - accuracy: 0.8970 - loss: 0.8546 - val_accuracy: 0.9179 - val_loss: 0.2653
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 733us/step - accuracy: 0.8955 - loss: 0.8733 - val_accuracy: 0.9336 - val_loss: 2.2284
Epoch 7/10
[1

<keras.src.callbacks.history.History at 0x23936fb0ce0>

In [205]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 566us/step - accuracy: 0.9042 - loss: 0.6094
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 445us/step


### Modelo de red reuronal 2

In [206]:
model = Sequential([

    Dense(5, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 813us/step - accuracy: 0.6831 - loss: 213.3518 - val_accuracy: 0.9313 - val_loss: 0.2965
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 718us/step - accuracy: 0.9264 - loss: 0.3122 - val_accuracy: 0.9344 - val_loss: 0.3431
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 733us/step - accuracy: 0.9276 - loss: 0.3086 - val_accuracy: 0.9343 - val_loss: 0.4750
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 720us/step - accuracy: 0.9257 - loss: 0.3029 - val_accuracy: 0.9348 - val_loss: 0.2247
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 740us/step - accuracy: 0.9242 - loss: 0.3269 - val_accuracy: 0.9339 - val_loss: 0.4320
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 733us/step - accuracy: 0.9223 - loss: 0.3190 - val_accuracy: 0.8758 - val_loss: 0.3339
Epoch 7/10
[

<keras.src.callbacks.history.History at 0x23937b63770>

In [207]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 588us/step - accuracy: 0.9240 - loss: 0.3127
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436us/step


### Modelo de red reuronal 3

In [208]:
model = Sequential([

    Dense(10, activation='relu', input_shape=(10,)),  # Capa oculta con 5 neuronas
    Dense(1, activation='sigmoid') 
])

model.compile(optimizer='adam',
              loss='binary_crossentropy', 

              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test)) 

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 740us/step - accuracy: 0.9028 - loss: 1.1810 - val_accuracy: 0.9341 - val_loss: 0.3443
Epoch 2/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 786us/step - accuracy: 0.9184 - loss: 0.3843 - val_accuracy: 0.9314 - val_loss: 0.2421
Epoch 3/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 725us/step - accuracy: 0.9121 - loss: 0.4413 - val_accuracy: 0.9307 - val_loss: 0.2391
Epoch 4/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 738us/step - accuracy: 0.9147 - loss: 0.3819 - val_accuracy: 0.9300 - val_loss: 0.2507
Epoch 5/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 728us/step - accuracy: 0.9129 - loss: 0.3837 - val_accuracy: 0.7081 - val_loss: 0.5734
Epoch 6/10
[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 745us/step - accuracy: 0.9160 - loss: 0.3900 - val_accuracy: 0.9337 - val_loss: 0.4464
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x23950d7dee0>

In [209]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

[1m2297/2297[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 652us/step - accuracy: 0.9156 - loss: 0.3807
[1m985/985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457us/step
