In [1]:
!pip install hydra-core

Collecting hydra-core
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting omegaconf<2.4,>=2.2 (from hydra-core)
  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting antlr4-python3-runtime==4.9.* (from hydra-core)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
     ---------------------------------------- 0.0/117.0 kB ? eta -:--:--
     ---------------------------------------- 0.0/117.0 kB ? eta -:--:--
     --- ------------------------------------ 10.2/117.0 kB ? eta -:--:--
     --------- --------------------------- 30.7/117.0 kB 262.6 kB/s eta 0:00:01
     ------------------- ----------------- 61.4/117.0 kB 363.1 kB/s eta 0:00:01
     ---------------------------------- - 112.6/117.0 kB 544.7 kB/s eta 0:00:01
     ------------------------------------ 117.0/117.0 kB 526.1 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Downloading hydra_core-1.3.2-py3-none-

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
qpd 0.4.4 requires antlr4-python3-runtime<4.12,>=4.11.1, but you have antlr4-python3-runtime 4.9.3 which is incompatible.


In [6]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pycaret.classification import *
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder

import mlflow
import mlflow.sklearn

import os
import hydra
from omegaconf import DictConfig
from hydra.experimental import compose, initialize_config_dir

In [7]:
hydra.core.global_hydra.GlobalHydra.instance().clear()

In [8]:
def process_train():
    """Function to process the data"""
    root_directory = "C:/IT3385 - MLOPS/MLOPS"
    config_dir = os.path.normpath(os.path.join(root_directory, "config", "process"))
    
    initialize_config_dir(config_dir=config_dir)
    config = compose(config_name="mushroom_process")
    print('All parameters in mushroom_process.yaml: ' + str(config))

    mushroom = mushroom.read_csv(config.data.raw)

    mushroom = mushroom.dropna()

    r2 = setup(
        data=mushroom,
        target=config.setup.target,
        train_size=config.setup.trainsize,
        transformation=config.setup.transform,
        normalize=config.setup.norm,
        session_id=config.setup.session,
        log_experiment=config.setup.logexp,
        experiment_name=config.setup.expname,
        remove_outliers=config.setup.rmoutlier,
        fold=config.setup.fold
    )

    print(f'Ordinal features: {mr_setup._fxs["Ordinal"]}')
    print(f'Categorical features: {mr_setup._fxs["Categorical"]}')

    mr_transformed = r2.get_config('dataset_transformed')
    full_file = os.path.join(config.data.ppath, file_name)
    mushroom_trans.to_csv(full_file, index=False)

    print(f'Data was processed and written at {full_file}.')

    # modelling
    best = compare_models()
    tuned_rf = tune_model(best, return_tuner=True)
    print(tuned_rf)
    best_rf = tuned_rf[0]
    plot_model(best_rf, plot='feature')
    evaluate_model(best_rf)

    #save pipeline
    final_rf = finalize_model(best_rf)
    save_model(final_rf, 'mushroom_pipeline')
    load_pipeline = load_model('mushroom_pipeline')
    print(load_pipeline)

    y_true = mr_pred['class']
    y_pred = mr_pred['prediction_label']
    label_encoder = LabelEncoder()
    y_true_encoded = label_encoder.fit_transform(y_true)
    y_pred_encoded = label_encoder.transform(y_pred)
    accuracy = accuracy_score(y_true_encoded, y_pred_encoded)
    auc = roc_auc_score(y_true_encoded, y_pred_encoded)
    precision = precision_score(y_true_encoded, y_pred_encoded)
    recall = recall_score(y_true_encoded, y_pred_encoded)
    f1 = f1_score(y_true_encoded, y_pred_encoded)
    mlflow.end_run()

    with mlflow.start(run) as run:
        mlflow.log_param('train_size', 0.8)
        mlflow.log_param('transformation', True)
        mlflow.log_param('normalize', True)
        mlflow.log_param('remove_outliers', True)
        mlflow.log_param('fold', 5)
    
        # Log metrics
        mlflow.log_metric('Accuracy', accuracy)
        mlflow.log_metric('AUC', auc)
        mlflow.log_metric('Precision', precision)
        mlflow.log_metric('Recall', recall)
        mlflow.log_metric('F1-Score', f1)
    
        # Log the experiment name and run ID
        mlflow.set_experiment('mushroom')
        mlflow.log_param('experiment_name', 'mushroom')
        # mlflow.log_param('run_id', run.info.run_id)
        
        # Log the pipeline object using mlflow.sklearn
        mlflow.sklearn.log_model(sk_model=best_rf, registered_model_name='mushroom_model', artifact_path='sk_model')
process_train()

MissingConfigException: Primary config directory not found.
Check that the config directory 'C:\IT3385 - MLOPS\MLOPS\config\process' exists and readable