## Carga de Librerias

In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Carga de datos Procesados

In [54]:
train_data = pd.read_csv('../data/processed/train_processed.csv')
train_data.head()


Unnamed: 0,PassengerId,Survived,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male
0,1,0,3,"Braund, Mr. Owen Harris",-0.565736,1,0,A/5 21171,7.25,,False,False,True,False,True
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0.663861,1,0,PC 17599,71.2833,C85,True,False,False,True,False
2,3,1,3,"Heikkinen, Miss. Laina",-0.258337,0,0,STON/O2. 3101282,7.925,,False,False,True,True,False
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0.433312,1,0,113803,53.1,C123,False,False,True,True,False
4,5,0,3,"Allen, Mr. William Henry",0.433312,0,0,373450,8.05,,False,False,True,False,True


# Configuramos nuestras variables

In [57]:
# Separar las características y la variable objetivo
X = train_data.drop('Survived', axis=1)  # 'Survived' es la columna objetivo
y = train_data['Survived']


In [59]:
# Eliminar columnas que no son útiles para el modelado
if 'Name' in X.columns:
    X = X.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# Verificar las columnas restantes
print("Columnas después de eliminar no numéricas:", X.columns)


Columnas después de eliminar no numéricas: Index(['PassengerId', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_C',
       'Embarked_Q', 'Embarked_S', 'Sex_female', 'Sex_male'],
      dtype='object')


# Dividir train y test

In [61]:
# Dividir los datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Verificar las dimensiones
print("Dimensiones de X_train:", X_train.shape)
print("Dimensiones de X_test:", X_test.shape)


Dimensiones de X_train: (712, 11)
Dimensiones de X_test: (179, 11)


In [63]:
# Verificar si quedan columnas no numéricas
print(X_train.info())

# Verificar si hay valores faltantes
print("Valores faltantes en X_train:\n", X_train.isnull().sum())


<class 'pandas.core.frame.DataFrame'>
Index: 712 entries, 331 to 102
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  712 non-null    int64  
 1   Pclass       712 non-null    int64  
 2   Age          712 non-null    float64
 3   SibSp        712 non-null    int64  
 4   Parch        712 non-null    int64  
 5   Fare         712 non-null    float64
 6   Embarked_C   712 non-null    bool   
 7   Embarked_Q   712 non-null    bool   
 8   Embarked_S   712 non-null    bool   
 9   Sex_female   712 non-null    bool   
 10  Sex_male     712 non-null    bool   
dtypes: bool(5), float64(2), int64(4)
memory usage: 42.4 KB
None
Valores faltantes en X_train:
 PassengerId    0
Pclass         0
Age            0
SibSp          0
Parch          0
Fare           0
Embarked_C     0
Embarked_Q     0
Embarked_S     0
Sex_female     0
Sex_male       0
dtype: int64


# Construir y Entrenar Modelos

In [65]:
# Crear el modelo de regresión logística
model = LogisticRegression(max_iter=1000)

# Entrenar el modelo
model.fit(X_train, y_train)
print("Modelo entrenado con éxito")


Modelo entrenado con éxito


# Evaluar el Modelo

In [68]:
# Hacer predicciones sobre el conjunto de prueba
y_pred = model.predict(X_test)

# Calcular la precisión
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy del modelo: {accuracy:.2f}')

# Generar y mostrar la matriz de confusión
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)


Accuracy del modelo: 0.80
Confusion Matrix:
 [[89 16]
 [19 55]]


### Precisión Global (Accuracy)

In [76]:
accuracy = (89 + 55) / (89 + 16 + 19 + 55)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 0.80


# Guardar los datos procesados

In [70]:
import joblib

# Guardar el modelo entrenado
joblib.dump(model, '../models/trained_logistic_regression_model.pkl')
print("Modelo guardado exitosamente en '../models/trained_logistic_regression_model.pkl'")


Modelo guardado exitosamente en '../models/trained_logistic_regression_model.pkl'
