In [1]:
import pandas as pd
import numpy as np

import mlflow

import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

In [2]:
train = pd.read_csv("../data/train_features.csv")
train.set_index('PassengerId', inplace=True)

X = train[[c for c in train.columns if c != 'Survived']].values
y = train.Survived.values

In [3]:
 X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)

In [4]:
models = [
    (RandomForestClassifier, {'n_estimators':100, 'max_depth':2, 'random_state':42}),
    (DecisionTreeClassifier, {}),
    (xgb.XGBClassifier, {})
]

In [5]:
for model, params in models:
    print("Training {}...".format(model.__name__))
    m = model(**params)
    m = m.fit(X_train, y_train)
    
    y_pred = m.predict(X_test)
    
    overall_metrics = {
        "precision_weighted": metrics.precision_score(y_test, y_pred, average='weighted'),
        "recall_weighted": metrics.recall_score(y_test, y_pred, average='weighted'),
        "f1_score_weighted": metrics.f1_score(y_test, y_pred, average='weighted'),
        "precision_macro": metrics.precision_score(y_test, y_pred, average='macro'),
        "recall_macro": metrics.recall_score(y_test, y_pred, average='macro'),
        "f1_score_macro": metrics.f1_score(y_test, y_pred, average='macro'),
    }
    
    print("Logging {} metrics".format(model.__name__))
    with mlflow.start_run():
        
        mlflow.log_param("model", model)
        mlflow.log_params(params)
        
        mlflow.log_metrics(overall_metrics)

Training RandomForestClassifier...
Logging RandomForestClassifier metrics
Training DecisionTreeClassifier...
Logging DecisionTreeClassifier metrics
Training XGBClassifier...
Logging XGBClassifier metrics
