In [None]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, f1_score

# Set the experiment name
mlflow.set_experiment("Titanic_Survival")

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load Titanic dataset
data = pd.read_csv("titanic.csv")

# Drop rows with missing values for simplicity
data = data.dropna(subset=["Age", "Embarked", "Fare", "Pclass", "Sex", "Survived"])

# Feature engineering
data["Sex"] = data["Sex"].apply(lambda x: 1 if x == "male" else 0)
data = pd.get_dummies(data, columns=["Embarked"])
X = data[["Pclass", "Sex", "Age", "Fare", "Embarked_C", "Embarked_Q", "Embarked_S"]]
y = data["Survived"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, f1_score
import mlflow.sklearn

def train_and_log_model(model, model_name):
    with mlflow.start_run(run_name=model_name):
        # Train the model
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Log model parameters
        if hasattr(model, "n_estimators"):
            mlflow.log_param("n_estimators", model.n_estimators)
        if hasattr(model, "max_depth"):
            mlflow.log_param("max_depth", model.max_depth)
        if hasattr(model, "max_iter"):
            mlflow.log_param("max_iter", model.max_iter)

        # Log metrics
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("f1_score", f1)

        # Log model
        mlflow.sklearn.log_model(model, model_name)

        print(f"Model '{model_name}' logged with accuracy: {accuracy:.4f}, precision: {precision:.4f}, f1_score: {f1:.4f}")                                                                                                        

In [None]:
from sklearn.linear_model import LogisticRegression

logreg = LogisticRegression(max_iter=1000)
train_and_log_model(logreg, "Logistic Regression")

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
train_and_log_model(rf, "Random Forest")

In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid for Grid Search
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [3, 5, 10],
    "min_samples_split": [2, 5, 10]
}

# Configure Grid Search
grid_search = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid,
    scoring="accuracy",
    cv=5,
    n_jobs=-1,
    verbose=1
)

In [None]:
with mlflow.start_run(run_name="Random Forest - Hyperparameter Tuning") as run:
    # Fit Grid Search
    grid_search.fit(X_train, y_train)
    best_rf = grid_search.best_estimator_
    y_pred = best_rf.predict(X_test)

    # Log best parameters
    mlflow.log_param("best_n_estimators", grid_search.best_params_["n_estimators"])
    mlflow.log_param("best_max_depth", grid_search.best_params_["max_depth"])
    mlflow.log_param("best_min_samples_split", grid_search.best_params_["min_samples_split"])

    # Log metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("f1_score", f1)

    # Log the best model
    mlflow.sklearn.log_model(best_rf, "Tuned Random Forest Model")
    print(f"Tuned Random Forest Model logged with accuracy: {accuracy:.4f}, precision: {precision:.4f}, f1_score: {f1:.4f}")