## Logistic Regression model

In [1]:
import pandas as pd

In [2]:
# Cargamos Train y Test
train_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/interini/clean_train.csv")
test_data = pd.read_csv("/workspaces/logistic-regression-project-tutorial-pilarzarco/interini/clean_test.csv")

train_data.head()

Unnamed: 0,month_n,previous,emp.var.rate,euribor3m,nr.employed,y_n
0,0.222222,0.0,1.0,0.98073,1.0,0.0
1,0.0,0.0,0.333333,0.138291,0.512287,0.0
2,0.0,0.0,0.9375,0.956926,0.859735,0.0
3,0.0,0.0,0.9375,0.957379,0.859735,0.0
4,0.888889,0.0,0.333333,0.175924,0.512287,0.0


In [3]:
# Extraemos las columnas excepto el 'target' para preparar los conjuntos de entrenamiento y prueba
X_train = train_data.drop(["y_n"], axis = 1)
y_train = train_data["y_n"]
X_test = test_data.drop(["y_n"], axis = 1)
y_test = test_data["y_n"]

In [4]:
# Modelo de regresión logística
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=71)
model.fit(X_train, y_train)

In [5]:
# Etiquetas predichas por el modelo para cada instancia en el conjunto de prueba
y_pred = model.predict(X_test)
y_pred

array([0., 0., 0., ..., 0., 0., 0.])

In [6]:
# Calcular la precisión del modelo de regresión logística
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.8853812530354541

#### - Model Optimization

In [7]:
from sklearn.model_selection import GridSearchCV

# Definición de Hiperparámetros
hyperparams = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    "penalty": ["l1", "l2", "elasticnet", None],
    "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]
}

# La búsqueda en malla 
grid = GridSearchCV(model, hyperparams, scoring = "accuracy", cv = 10)
grid

In [8]:
# Mejores hiperparámetros
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

grid.fit(X_train, y_train)

print(f"Best hyperparameters: {grid.best_params_}")

Best hyperparameters: {'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}


In [9]:
# Ajustar a los mejores hiperparámetros
model = LogisticRegression(C = 0.1, penalty = "l1", solver = "saga")
model.fit(X_train, y_train)

In [10]:
# Predicciones en el de prueba
y_pred = model.predict(X_test)
y_pred

array([0., 0., 0., ..., 0., 0., 0.])

In [11]:
# Evaluar la precisión del modelo de regresión logística
accuracy_score(y_test, y_pred)

0.88465274405051

In [12]:
# Guardamos nuestro modelo
from pickle import dump

dump(model, open("/workspaces/logistic-regression-project-tutorial-pilarzarco/src/logistic-regression.sav", "wb"))