In [1]:
import sys
sys.path.append('../')

In [4]:
import logging
import pandas as pd
from titanic_pro.pipeline import pipeline, logger
from titanic_pro.train_utils import tune_model
from titanic_pro.utils import load_data
from sklearn.model_selection import train_test_split
from joblib import dump

In [9]:
logger.info("Loading data...")
train_df, _ = load_data()
logger.info(train_df.columns.values.tolist())
y = train_df["survived"]
X = train_df.drop(columns=["survived"])

logger.info("Splitting data into train and validation sets...")
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

logger.info("Tuning model...")
best_params, best_score = tune_model(X_train, y_train)
logger.info(f"Best parameters: {best_params}")
logger.info(f"Best score: {best_score}")

logger.info("Training the model with the best parameters...")
pipeline.set_params(**best_params)
pipeline.fit(X_train, y_train)

logger.info("Evaluating the model on the validation set...")
score = pipeline.score(X_val, y_val)
logger.info(f"Validation accuracy: {score}")

logger.info("Saving the model...")
dump(pipeline, "model.joblib")


2023-04-08 12:02:43,865 - INFO - Loading data...
2023-04-08 12:02:43,880 - INFO - ['pclass', 'survived', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket', 'fare', 'cabin', 'embarked', 'boat', 'body', 'home.dest']
2023-04-08 12:02:43,880 - INFO - Splitting data into train and validation sets...
2023-04-08 12:02:43,880 - INFO - Tuning model...


Fitting 5 folds for each of 27 candidates, totalling 135 fits


2023-04-08 12:02:45,671 - INFO - Best parameters: {'classifier__learning_rate': 0.01, 'classifier__max_depth': 3, 'classifier__n_estimators': 100}
2023-04-08 12:02:45,672 - INFO - Best score: 0.8237982663514579
2023-04-08 12:02:45,673 - INFO - Training the model with the best parameters...
2023-04-08 12:02:45,749 - INFO - Evaluating the model on the validation set...
2023-04-08 12:02:45,764 - INFO - Validation accuracy: 0.7966101694915254
2023-04-08 12:02:45,765 - INFO - Saving the model...


['model.joblib']