In [68]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# 1. Cargar datos
# Cargar el dataset del Titanic desde un archivo CSV o enlace
data = pd.read_csv("titanic.csv")  # Asegúrate de descargar el dataset

# Preprocesamiento básico: selecciona columnas útiles y trata valores nulos
data = data[["Pclass", "Sex", "Age", "Survived"]].dropna()
data["Sex"] = data["Sex"].map({"male": 0, "female": 1})  # Codificar género

print(data.info())
data.head()

<class 'pandas.core.frame.DataFrame'>
Index: 332 entries, 0 to 415
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    332 non-null    int64  
 1   Sex       332 non-null    int64  
 2   Age       332 non-null    float64
 3   Survived  332 non-null    int64  
dtypes: float64(1), int64(3)
memory usage: 13.0 KB
None


Unnamed: 0,Pclass,Sex,Age,Survived
0,3,0,34.5,0
1,3,1,47.0,1
2,2,0,62.0,0
3,3,0,27.0,0
4,3,1,22.0,1


In [80]:
# 2. Dividir los datos en conjuntos de entrenamiento y prueba
# Completa aquí: define X, y y realiza el split con train_test_split

X = data[["Pclass", "Sex", "Age"]]

y = data["Survived"]  # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Train features shape (X):", X_train.shape)
print("Train target shape (y):", y_train.shape)
print("Test features shape (X):", X_test.shape)
print("Test target shape (y):", y_test.shape)

# NORMALIZAMOS
scaler = StandardScaler() # Z-score
scaler.fit(X_train)

X_train = scaler.transform(X_train)
# Aplico el mismo scaler con los datos de test
X_test = scaler.transform(X_test)

Train features shape (X): (265, 3)
Train target shape (y): (265,)
Test features shape (X): (67, 3)
Test target shape (y): (67,)


In [81]:
# 3. Entrenar un modelo simple
# Completa aquí: inicializa y entrena un modelo LogisticRegression
from sklearn.metrics import confusion_matrix

model = LogisticRegression()
model.fit(X_train, y_train)

In [90]:
# 4. Evaluar el modelo (opcional)
# Realiza predicciones en el conjunto de prueba y muestra la precisión
# Completa aquí

y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

# Prediccion
X_new = pd.DataFrame([{'Pclass': 3,
                     'Sex': 1,
                     'Age': 70,}])
model.predict(X_new)

[[42  0]
 [ 0 25]]
1.0




array([1])

In [92]:
# 5. Guardar el modelo en un archivo pickle
with open("titanic_model.pkl", "wb") as f:
    pickle.dump(model, f)
    pass

print("Modelo guardado como titanic_model.pkl")

Modelo guardado como titanic_model.pkl


In [None]:
# 6. Guardar el normalizador en un archivo pickle
with open("Scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
    pass

print("Normalización guardada como ")