In [None]:
import mlflow
import mlflow.sklearn
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define column transformer
numeric_features = [0, 1, 2, 3]
numeric_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features)
    ])

# Define pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
])

# Define parameter grid for GridSearchCV
param_grid = {
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [3, 4, 5],
    'classifier__learning_rate': [0.01, 0.1, 0.2]
}

# Define GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=2)

# Start MLflow run
with mlflow.start_run():
    # Fit model
    grid_search.fit(X_train, y_train)

    # Log best parameters
    mlflow.log_params(grid_search.best_params_)

    # Log best score
    mlflow.log_metric("best_score", grid_search.best_score_)

    # Log model
    mlflow.sklearn.log_model(grid_search.best_estimator_, "model")

    # Print best parameters and score
    print("Best parameters found: ", grid_search.best_params_)
    print("Best cross-validation score: ", grid_search.best_score_)
    # Custom feature engineering
    def add_custom_features(X):
        mean_feature = np.mean(X, axis=1).reshape(-1, 1)
        median_feature = np.median(X, axis=1).reshape(-1, 1)
        std_feature = np.std(X, axis=1).reshape(-1, 1)
        return np.hstack((X, mean_feature, median_feature, std_feature))

    # Apply custom feature engineering to training and test sets
    X_train = add_custom_features(X_train)
    X_test = add_custom_features(X_test)
    # Add a new dimension with the value 1
    def add_constant_feature(X):
        constant_feature = np.ones((X.shape[0], 1))
        return np.hstack((X, constant_feature))

    # Apply the new dimension to training and test sets
    X_train = add_constant_feature(X_train)
    X_test = add_constant_feature(X_test)