## Logistic Regression model (Scal)

In [1]:
import pandas as pd
# Cargamos Train y Test con el escalado
train_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/model_scal_train.csv")
test_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/model_scal_test.csv")

train_data.head()

Unnamed: 0,duration,previous,emp.var.rate,euribor3m,nr.employed,y_n
0,0.139752,0.0,0.9375,0.957379,0.859735,0.0
1,0.487578,0.0,1.0,0.980957,1.0,0.0
2,0.271739,0.0,1.0,0.98141,1.0,0.0
3,0.259317,0.0,0.333333,0.146679,0.512287,0.0
4,0.299689,0.0,1.0,0.980957,1.0,0.0


In [2]:
# Modelo de regresión logística
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=71)
model

In [3]:
# Dividir las columnas de train

X_train = train_data.drop(["y_n"], axis = 1) # Seleción características
y_train = train_data["y_n"] # Variable objetivo

X_train.head()

Unnamed: 0,duration,previous,emp.var.rate,euribor3m,nr.employed
0,0.139752,0.0,0.9375,0.957379,0.859735
1,0.487578,0.0,1.0,0.980957,1.0
2,0.271739,0.0,1.0,0.98141,1.0
3,0.259317,0.0,0.333333,0.146679,0.512287
4,0.299689,0.0,1.0,0.980957,1.0


In [4]:
y_train

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
37053    0.0
37054    0.0
37055    1.0
37056    0.0
37057    0.0
Name: y_n, Length: 37058, dtype: float64

In [5]:
# Dividir las columnas de test

X_test = test_data.drop(["y_n"], axis = 1)
y_test = test_data["y_n"]

X_test.head()

Unnamed: 0,duration,previous,emp.var.rate,euribor3m,nr.employed
0,0.167702,0.0,1.0,0.98073,1.0
1,0.063665,0.0,0.333333,0.150759,0.512287
2,0.099379,0.0,1.0,0.980277,1.0
3,0.312112,0.0,0.479167,0.055996,0.0
4,0.305901,0.0,1.0,0.980503,1.0


In [6]:
# Entrenar train
model.fit(X_train, y_train)

In [7]:
# Predicciones del modelo train
y_pred = model.predict(X_train)
y_pred

array([0., 0., 0., ..., 0., 0., 0.])

In [8]:
# calcular la precisión del modelo
from sklearn.metrics import accuracy_score

accuracy_score(y_train, y_pred)

0.902315289546117

In [9]:
# Búsqueda exhaustiva de hiperparámetros
from sklearn.model_selection import GridSearchCV

hyperparams = {
    "penalty": ["l1", None, "l2"],
    "dual": [True, False],
    "C": [0.001, 0.01, 0.1, 1, 10, 100]
}

grid = GridSearchCV(model, hyperparams, cv = 5)
grid

In [10]:
# Ajustar a los datos de entrenamiemto
grid.fit(X_train, y_train)



120 fits failed out of a total of 180.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
60 fits failed with the following error:
Traceback (most recent call last):
  File "/home/codespace/.local/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/codespace/.local/lib/python3.10/site-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/codespace/.local/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1169, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/home/codespace/.local/lib/python3.10/site-packages/sklearn/linear_m

In [11]:
# diccionario con los mejores valores de los hiperparámetros 
grid.best_params_

{'C': 0.001, 'dual': False, 'penalty': None}

In [12]:
# Obtener mejor modelo estimado
grid.best_estimator_

In [13]:
# Acceder al mejor modelo estimado
best_model = grid.best_estimator_
# best_model = LogisticRegression(C = 0.001, penalty = None, random_state = 42)
# best_model.fit(X_train, y_train)

In [14]:
# Predicciones de los datos de entrenamiento
y_pred = best_model.predict(X_train)

In [15]:
# calcular la precisión del modelo en tus datos de entrenamiento
accuracy_score(y_train, y_pred)

0.9024502131793405

In [16]:
# Predicciones en los datos de prueba
y_pred_test = best_model.predict(X_test)

In [17]:
# calcular la precisión del modelo en tus datos de prueba
accuracy_score(y_test,y_pred_test)

0.8994657600777076

#### - Model Optimization

In [18]:
# Guardamos nuestro modelo
from pickle import dump

# Cargar el modelo desde el archivo
dump(model, open("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/optimization/ML-logistic-regression-model-scal.sav", "wb"))