In [5]:
import os
import mlflow
import numpy as np


In [6]:

os.environ["AWS_ACCESS_KEY_ID"] = "minio"
os.environ["AWS_SECRET_ACCESS_KEY"] = "minio123"
os.environ["MLFLOW_S3_ENDPOINT_URL"] = f"http://minio:9000"

In [7]:
# Import necessary libraries
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.utils import resample
import mlflow
from mlflow.tracking import MlflowClient



In [8]:
# Generate mock imbalanced data
X, y = make_classification(n_samples=10000, n_features=10, n_classes=2, weights=[0.9, 0.1], random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Upsample the minority class in the training data
X_train_upsampled, y_train_upsampled = resample(X_train[y_train == 1], y_train[y_train == 1], replace=True, n_samples=X_train[y_train == 0].shape[0], random_state=42)
X_train = np.concatenate((X_train[y_train == 0], X_train_upsampled))
y_train = np.concatenate((y_train[y_train == 0], y_train_upsampled))

# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression(random_state=42))
])




In [9]:
pipeline

In [10]:
# Set up MLFlow logging
mlflow.set_tracking_uri("http://mlflow_server:5000")
mlflow.set_experiment("my_experiment")
client = MlflowClient()



In [11]:
# Start a new run
with mlflow.start_run():
    print(mlflow.get_artifact_uri())
    # Log parameters
    mlflow.log_param("up_sampling", "True")
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = pipeline.predict(X_test)
    
    # Log metrics
    mlflow.log_metric("accuracy", pipeline.score(X_test, y_test))
    mlflow.log_metric("precision", classification_report(y_test, y_pred, output_dict=True)['1']['precision'])
    mlflow.log_metric("recall", classification_report(y_test, y_pred, output_dict=True)['1']['recall'])
    
    # Log the trained model
    mlflow.sklearn.log_model(pipeline, "logistic_model")
    
    # Get the run ID
    run_id = mlflow.active_run().info.run_id
    
    # Get the experiment ID
    experiment_id = client.get_experiment_by_name("my_experiment").experiment_id
    
    # Get the run details
    run_details = client.get_run(run_id).data
    
    # Print the experiment ID, run ID, and run details
    print(f"Experiment ID: {experiment_id}")
    print(f"Run ID: {run_id}")
    print(f"Run Details: {run_details}")

s3://mlflow/1/d827e52cc5e04859b1aa0fd9111c2090/artifacts




Experiment ID: 1
Run ID: d827e52cc5e04859b1aa0fd9111c2090
Run Details: <RunData: metrics={'accuracy': 0.894,
 'precision': 0.4857142857142857,
 'recall': 0.8415841584158416}, params={'up_sampling': 'True'}, tags={'mlflow.log-model.history': '[{"run_id": "d827e52cc5e04859b1aa0fd9111c2090", '
                             '"artifact_path": "logistic_model", '
                             '"utc_time_created": "2023-04-16 '
                             '20:30:08.801700", "flavors": {"python_function": '
                             '{"model_path": "model.pkl", "predict_fn": '
                             '"predict", "loader_module": "mlflow.sklearn", '
                             '"python_version": "3.10.10", "env": {"conda": '
                             '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                             '"sklearn": {"pickled_model": "model.pkl", '
                             '"sklearn_version": "1.2.2", '
                             '"serialization_format

In [12]:
!pip install hyperopt
import hyperopt
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import mlflow.sklearn

# Define the search space
space = {
    'penalty': hp.choice('penalty', ['l2']),
    'C': hp.loguniform('C', -10, 10),
    'fit_intercept': hp.choice('fit_intercept', [True, False]),
    'solver': hp.choice('solver', ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])
}

# Define the objective function
def objective(params):
    pipeline = Pipeline([
        ('clf', LogisticRegression(**params))
    ])
    
    pipeline.fit(X_train, y_train)
    
    y_pred = pipeline.predict(X_test)
    
    accuracy = pipeline.score(X_test, y_test)
    precision = classification_report(y_test, y_pred, output_dict=True)['1']['precision']
    recall = classification_report(y_test, y_pred, output_dict=True)['1']['recall']
    
    # Log metrics
    with mlflow.start_run():
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        
        # Log the trained model
        mlflow.sklearn.log_model(pipeline, "logistic_model")
        
        return {'loss': -accuracy, 'status': STATUS_OK}

# Generate mock data
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the trials object
trials = Trials()

# Run the hyperparameter search
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=50,
            trials=trials)

# Get the best hyperparameters
best_params = hyperopt.space_eval(space, best)

# Train the model with the best hyperparameters
best_pipeline = Pipeline([
    ('clf', LogisticRegression(**best_params))
])

best_pipeline.fit(X_train, y_train)

# Log the best model
with mlflow.start_run():
    mlflow.log_params(best_params)
    mlflow.sklearn.log_model(best_pipeline, "best_logistic_model")



100%|██████████| 50/50 [02:11<00:00,  2.62s/trial, best loss: -0.84] 
