In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

# Pregunta #6

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder

# Cargar los datos
df = pd.read_csv("train.csv")  # Cambia el nombre si es diferente

# Variable objetivo
y = df["SalePrice"]

# üî∏ Eliminar columnas no num√©ricas o convertirlas a n√∫meros
df_numeric = df.select_dtypes(include=[np.number])

# üî∏ Opcional: Eliminar la columna Id si no aporta al modelo
if "Id" in df_numeric.columns:
    df_numeric = df_numeric.drop("Id", axis=1)

# üî∏ Separar las caracter√≠sticas (X)
X = df_numeric.drop("SalePrice", axis=1)

# üî∏ Imputar NaN con la media
imputer = SimpleImputer(strategy="mean")
X_imputed = imputer.fit_transform(X)

# üî∏ Binarizar y: por ejemplo, casas caras vs. baratas
# Aqu√≠ hacemos clasificaci√≥n, no regresi√≥n ‚Üí convertimos SalePrice a 0 o 1
y_binary = (y > y.median()).astype(int)  # 1 = caro, 0 = barato

# üî∏ Separar datos
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y_binary, test_size=0.2, random_state=42)

# üî∏ Entrenar modelo de regresi√≥n log√≠stica
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# üî∏ Evaluaci√≥n
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print(f"Precisi√≥n en entrenamiento: {accuracy_score(y_train, y_pred_train):.2f}")
print(f"Precisi√≥n en prueba: {accuracy_score(y_test, y_pred_test):.2f}")
print("Matriz de confusi√≥n:")
print(confusion_matrix(y_test, y_pred_test))


Precisi√≥n en entrenamiento: 0.90
Precisi√≥n en prueba: 0.92
Matriz de confusi√≥n:
[[147  14]
 [  8 123]]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Pregunta 7

In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Definir el modelo de regresi√≥n log√≠stica
model = LogisticRegression(solver='liblinear')  # 'liblinear' es un buen solver para peque√±a/muy grande regularizaci√≥n

# Definir el espacio de par√°metros a explorar
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],  # Par√°metro de regularizaci√≥n
    'penalty': ['l1', 'l2'],       # Tipo de regularizaci√≥n
    'max_iter': [100, 200, 500]    # N√∫mero m√°ximo de iteraciones
}

# Realizar la b√∫squeda de los mejores par√°metros utilizando GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Mostrar los mejores par√°metros encontrados
print(f"Mejores par√°metros: {grid_search.best_params_}")

# Evaluar el modelo con los mejores par√°metros
best_model = grid_search.best_estimator_

# Precisi√≥n en entrenamiento
train_accuracy = accuracy_score(y_train, best_model.predict(X_train))
print(f"Precisi√≥n en entrenamiento: {train_accuracy:.2f}")

# Precisi√≥n en prueba
test_accuracy = accuracy_score(y_test, best_model.predict(X_test))
print(f"Precisi√≥n en prueba: {test_accuracy:.2f}")

# Matriz de confusi√≥n
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, best_model.predict(X_test))
print("Matriz de confusi√≥n:")
print(cm)




Mejores par√°metros: {'C': 10, 'max_iter': 500, 'penalty': 'l1'}
Precisi√≥n en entrenamiento: 0.92
Precisi√≥n en prueba: 0.93
Matriz de confusi√≥n:
[[150  11]
 [  8 123]]
