# Parse MLFlow Results

## Setup

In [2]:
import sys
import os
import json
import warnings

from tqdm.notebook import (tqdm, trange)

module_path = os.path.abspath(os.path.join(".."))
code_dirs = ["src", "src/profile_utils"]

for d in code_dirs:
    code_path = os.path.join(module_path, d)
    if code_path not in sys.path:
        sys.path.append(code_path)

In [3]:
import json
import os
import sys

import mlflow
import ray
import yaml

from mlflow_loader import (_get_best_run_info,
                           load_model,
                           load_dataset,
                           load_dataset_from_str,
                           load_model_from_str)

import typing as t
import pandas as pd
import numpy as np

import lightgbm
import xgboost
import catboost
import sklearn.ensemble
import pickle

from multiprocessing import Process
from pathlib import Path

from mlflow.utils.file_utils import local_file_uri_to_path

from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.trial import Trial

In [4]:
%load_ext autoreload

In [5]:
%autoreload 2

## Loading MLFlow Model Information

In [5]:
old_mlflow_runs = ["mlflow:///776fa4318c834d8290c2eb4fcd98f3ac",
                   "mlflow:///2d30f287626044fb8ec0c1749021e381",
                   "mlflow:///5f4d0ed98ce544c482bc7dc408aeb93b",
                   "mlflow:///7893fb6f508d417ea5b30f0b91884b93",

                   "mlflow:///1ed703aec2a643dd99782d1a604cedc2",
                   "mlflow:///5e3dcb49d2054974a223bc44eb807ca3",
                   "mlflow:///a0ac847e0b0b4dc3b073a7cbeeda8208",
                   "mlflow:///2c0dabc46f7947509698931814ae6d37",

                   "mlflow:///405b08cf567f4d9489fac03cb60fb22b",
                   "mlflow:///3e69c4453cdb4001afedb6e865d6f4de",
                   "mlflow:///0260743d01e3458783597f4e2a1d2c90",
                   "mlflow:///398a4f86c696412396e8f13080ffa3aa",

                   "mlflow:///7ec2fea2f36b4fc9bcd9e8f21e47f87e",
                   "mlflow:///c76a20f3c91f420f9c3acc1ba1f5eae7",
                   "mlflow:///613ac70fb5d446b4a74f42f8c4a9f667",
                   "mlflow:///13654d2844104dfdbc2c5afcc70fff9d",

                   "mlflow:///9f95a043b84f47d6a947e5d6ce728ff8",
                   "mlflow:///d5278921411f47acab4c2c9a024fd92b",
                   "mlflow:///811758f752d74e5f8c642fbb7232ab3d",
                   "mlflow:///215be13a50804da0ac2637b7d51691ea",

                   "mlflow:///957115499f3a437994024102ce5d6a7b",
                   "mlflow:///b98f62d4796e4098b12c7ceae6ccd344",
                   "mlflow:///5cafe13f6a7e4788ac99514c0f3c3d89",
                   "mlflow:///4c89f2540439493d9333c22a4a0b3d05",

                   "mlflow:///5cc5993a95ce422e8e27e630eba20c60",
                   "mlflow:///b94372d7701b47939f2087e15d29ea25",
                   "mlflow:///6d61b0034c6c48d39fdaab3dfc6242b1",
                   "mlflow:///1a4057dc85534a54a3a42073a707e4f9"]

In [23]:
                         # Churn modelling
numerical_mlflow_runs = ["mlflow:///9a5be2a42818460fad79771dcbc31f7d", # RF
                         "mlflow:///f86e1496c4c54d3c848060d5d71e907a", # XGBoost
                         "mlflow:///c8ac001b4cb14ce895193fbf4c2b66c3", # CatBoost
                         "mlflow:///646a911f42054eaeb4bdf2451336ae4f", # LightGBM
                         # Eye movements
                         "mlflow:///ec48e63b856e4ee48bc0a0b275dac82c", # RF
                         "mlflow:///0dbcbda035614d3c84ea3c6a123a9488", # XGBoost
                         "mlflow:///da33915e97b246a59c0e7d45a6927a44", # CatBoost
                         "mlflow:///4369c9faa001416ba02a231b3103ada0", # LightGBM
                         # Forest cover
                         "mlflow:///a0213340533f460c9a31f00a2c05171a", # RF
                         "mlflow:///8f1422bd63284437b767c16ca2201b29", # XGBoost
                         "mlflow:///89802782db2b44428c8945b5a429d0bc", # CatBoost
                         "mlflow:///33fae9e93c114cce81d62c9e44a1240b", # LightGBM
                         # Gas concentration
                         "mlflow:///419e2001aa7c463c932c574d7af6880f", # RF
                         "mlflow:///fdbacb22e28342d69e1dc336afac1eac", # XGBoost
                         "mlflow:///2e36c6bd012e45049fb963da227e4750", # CatBoost
                         "mlflow:///171f98fdfded42b89ea4dbdf1e52ddf0", # LightGBM
                         # Gesture phase
                         "mlflow:///f079a7552c6047bf9208b34c1b28a4ef", # RF
                         "mlflow:///8871a63a748a44a3b93bc6108de8ea79", # XGBoost
                         "mlflow:///a79f26e89ced4df4b81bb13b6cd39b72", # CatBoost
                         "mlflow:///6140eac47d7541f69989255d7b335925", # LightGBM
                         # Telco
                         "mlflow:///783fd070683a417ea86868bdc161bc03", # RF
                         "mlflow:///ba8c1a01653a418e8435a9f6f0a5666b", # XGBoost
                         "mlflow:///6545751be3bc4bdb9a0acfae34f3f13f", # CatBoost
                         "mlflow:///8916bd24de524f4b9d1297e5a8622b76", # LightGBM
                         # Rossman
                         "mlflow:///5cc5993a95ce422e8e27e630eba20c60", # RF
                         "mlflow:///b94372d7701b47939f2087e15d29ea25", # XGBoost
                         "mlflow:///6d61b0034c6c48d39fdaab3dfc6242b1", # CatBoost
                         "mlflow:///1a4057dc85534a54a3a42073a707e4f9"] # LightGBM

In [1]:
               # Churn modelling (numerical)
mlflow_runs = ["mlflow:///c8ac001b4cb14ce895193fbf4c2b66c3", # CatBoost (Round 5)
               # 
               # Eye movements (numerical)
               "mlflow:///0dbcbda035614d3c84ea3c6a123a9488", # XGBoost (Round 5)
               # 
               # Forest cover (numerical)
               "mlflow:///165346fb809b4ccfabe39ce9617393f1", # XGBoost (Round 6)
               #                            
               # Gas concentration (numerical)
               "mlflow:///56bc305886f240588ce1a32f8deadd09", # RF (Round 6)
               # 
               # Gesture phase (numerical)
               "mlflow:///8871a63a748a44a3b93bc6108de8ea79", # XGBoost (Round 5)
               # 
               # Telco (numerical)
               "mlflow:///f1630f43b74447d58a317aa4b58868a5", # XGBoost (Round 6)
               #
               # Rossman (numerical)
               "mlflow:///a4a41de730bc49339c694c0a3576a5d5", # XGBoost (Round 6)
              ]

In [6]:
model_info_list = [_get_best_run_info(run) for run in mlflow_runs]



## Parsing Model Information, Generating Table

In [7]:
parsed_model_list = [load_model(model) for model in model_info_list]

Loading model of type catboost, dataset churn_modelling:numerical
Loading model of type xgboost, dataset eye_movements:numerical
Loading model of type xgboost, dataset forest_cover_type:numerical
Loading model of type rf, dataset gas_concentrations:numerical
Loading model of type xgboost, dataset gesture_phase_segmentation:numerical
Loading model of type xgboost, dataset telco_customer_churn:numerical
Loading model of type xgboost, dataset rossmann_store_sales:numerical


In [8]:
parsed_model_df = pd.DataFrame(parsed_model_list)
parsed_model_df["CAM_rows"] = 256
parsed_model_df["used_arrays"] = np.ceil(parsed_model_df["n_trees"] / (parsed_model_df["CAM_rows"] / parsed_model_df["max_leaves"]))
parsed_model_df["is_valid"] = parsed_model_df["used_arrays"] <= 3000
parsed_model_df.drop(columns = ["params_path", "dataset_info", "trained_model"],
                     inplace = True)

In [9]:
parsed_model_df.to_csv("parsed_models.csv", index = False)