In [None]:
import os
import mlflow
# Set the environment variables for MLflow authentication
os.environ['MLFLOW_TRACKING_USERNAME'] = 'admin'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'password'

# Set the MLflow tracking URI to the CSIM MLflow server
mlflow.set_tracking_uri("http://mlflow.ml.brain.cs.ait.ac.th")

# Create an experiment
mlflow.set_experiment("st125159-a3")

In [1]:
# from sklearn import datasets
from sklearn.preprocessing import StandardScaler
import numpy as np
import mlflow
import mlflow.sklearn

# Step 1: Prepare data
iris = datasets.load_iris()
X = iris.data[:, 2:]  # we only take the last two features for simplicity
y = iris.target  # Target variable with 3 classes

# Split data into training and test datasets
idx = np.arange(0, len(X), 1)
np.random.shuffle(idx)
idx_train = idx[0:int(.7 * len(X))]
idx_test = idx[len(idx_train):len(idx)]

X_train = X[idx_train]
X_test = X[idx_test]
y_train = y[idx_train]
y_test = y[idx_test]

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Add intercept
intercept = np.ones((X_train.shape[0], 1))
X_train = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test = np.concatenate((intercept, X_test), axis=1)

# One-hot encoding the labels
def one_hot_encode(y, num_classes):
    return np.eye(num_classes)[y]

# Create one-hot encoded labels
Y_train_encoded = one_hot_encode(y_train, len(set(y)))  # Use all classes

# Define parameter grid
penalties = ['l1', 'l2']  # Different regularization methods
lambdas = [0.1, 0.01, 0.001]  # Different lambda values
methods = ['batch', 'minibatch', 'sto']  # Different optimization methods

best_accuracy = 0  # Track the best accuracy
best_run_details = None  # Store details of the best run

# Iterate over different combinations of penalty, lambda, and method
for penalty in penalties:
    for lambda_ in lambdas:
        for method in methods:
            # Start MLflow experiment
            with mlflow.start_run():
                # Log parameters for each run
                mlflow.log_param("penalty", penalty)
                mlflow.log_param("lambda", lambda_)
                mlflow.log_param("method", method)

                # Create and fit the logistic regression model (Assuming log_reg_ridge is a custom class)
                log_reg_ridge = LogisticRegression(k=len(set(y)), n=X_train.shape[1] - 1,
                                                   method=method, use_penalty=True, 
                                                   penalty=penalty, lambda_=lambda_)
                log_reg_ridge.fit(X_train, Y_train_encoded)

                # Predict on the test set
                y_pred = log_reg_ridge.predict(X_test)

                # Evaluation metrics
                accuracy = log_reg_ridge.accuracy(y_test, y_pred)
                macro_precision = log_reg_ridge.macro_precision(y_test, y_pred)
                macro_recall = log_reg_ridge.macro_recall(y_test, y_pred)
                macro_f1 = log_reg_ridge.macro_f1(y_test, y_pred)

                # Log metrics for each run
                mlflow.log_metric("accuracy", accuracy)
                mlflow.log_metric("macro_precision", macro_precision)
                mlflow.log_metric("macro_recall", macro_recall)
                mlflow.log_metric("macro_f1_score", macro_f1)

                # Check if this run has the best accuracy so far
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_run_details = {
                        'penalty': penalty,
                        'lambda': lambda_,
                        'method': method,
                        'accuracy': accuracy,
                        'macro_precision': macro_precision,
                        'macro_recall': macro_recall,
                        'macro_f1': macro_f1
                    }

                # End the current MLflow run
                mlflow.end_run()

# After all runs, log the best model details in a separate run
if best_run_details:
    with mlflow.start_run(run_name="Best Model Run"):
        # Log the best parameters
        mlflow.log_param("best_penalty", best_run_details['penalty'])
        mlflow.log_param("best_lambda", best_run_details['lambda'])
        mlflow.log_param("best_method", best_run_details['method'])

        # Log the best metrics
        mlflow.log_metric("best_accuracy", best_run_details['accuracy'])
        mlflow.log_metric("best_macro_precision", best_run_details['macro_precision'])
        mlflow.log_metric("best_macro_recall", best_run_details['macro_recall'])
        mlflow.log_metric("best_macro_f1_score", best_run_details['macro_f1'])

    print("\nBest Model Details:")
    print(f"Penalty: {best_run_details['penalty']}")
    print(f"Lambda: {best_run_details['lambda']}")
    print(f"Method: {best_run_details['method']}")
    print(f"Accuracy: {best_run_details['accuracy']}")
    print(f"Macro Precision: {best_run_details['macro_precision']}")
    print(f"Macro Recall: {best_run_details['macro_recall']}")
    print(f"Macro F1 Score: {best_run_details['macro_f1']}")


KeyboardInterrupt: 