## Logistic Regression model

In [51]:
import pandas as pd

In [52]:
# Cargamos Train y Test
train_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/model_scal_train.csv")
test_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/model_scal_test.csv")

train_data.head()

Unnamed: 0,month_n,previous,emp.var.rate,euribor3m,nr.employed
0,0.0,0.0,0.9375,0.957379,0.859735
1,0.222222,0.0,1.0,0.980957,1.0
2,0.222222,0.0,1.0,0.98141,1.0
3,0.0,0.0,0.333333,0.146679,0.512287
4,0.111111,0.0,1.0,0.980957,1.0


In [53]:
# Extraemos las columnas excepto el 'target' para preparar los conjuntos de entrenamiento y prueba
X_train = train_data.drop(["y_n"], axis = 1)
y_train = train_data["y_n"]

X_test = test_data.drop(["y_n"], axis = 1)
y_test = test_data["y_n"]

KeyError: "['y_n'] not found in axis"

In [None]:
# Modelo de regresión logística
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=71)
model.fit(X_train, y_train)

In [None]:
# Etiquetas predichas por el modelo para cada instancia en el conjunto de prueba
y_pred = model.predict(X_test)
y_pred

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- duration
- pdays
Feature names seen at fit time, yet now missing:
- month_n
- previous


In [None]:
# Calcular la precisión del modelo de regresión logística
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.9023797960174842

#### - Model Optimization

In [None]:
from sklearn.model_selection import GridSearchCV

# Definición de Hiperparámetros
hyperparams = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    "penalty": ["l1", "l2", "elasticnet", None],
    "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]
}

# La búsqueda en malla 
grid = GridSearchCV(model, hyperparams, scoring = "accuracy", cv = 10)
grid

In [None]:
# Mejores hiperparámetros
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

grid.fit(X_train, y_train)

print(f"Best hyperparameters: {grid.best_params_}")



KeyboardInterrupt: 

In [None]:
# Ajustar a los mejores hiperparámetros
model = LogisticRegression(C = 0.1, penalty = "l1", solver = "saga")
model.fit(X_train, y_train)

In [None]:
# Predicciones en la prueba
y_pred = model.predict(X_test)
y_pred

array([0., 0., 0., ..., 0., 0., 0.])

In [None]:
# Evaluar la precisión del modelo de regresión logística
accuracy_score(y_test, y_pred)

0.8970373967945605

In [None]:
# Guardamos nuestro modelo
from pickle import dump

dump(model, open("/workspaces/logistic-regression-project-tutorial-pilarzarco/models/ML-logistic-regression-model-scal.sav", "wb"))