In [3]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import mlflow.pyfunc
from mlflow.client import MlflowClient
client = MlflowClient(tracking_uri="https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow",
                      registry_uri="https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow")
# client = MlflowClient()
import dagshub
import pandas as pd
import numpy as np
import os
import yaml
import json
import pickle
import optuna
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, space_eval
from hyperopt.pyll.base import scope
from pathlib import Path
from box import ConfigBox
from pprint import pprint

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression, Ridge,Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import RandomizedSearchCV
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

import warnings as w
w.filterwarnings("ignore")

from typing import NewType #type: ignore
ML_Model = NewType('Machine_Learning_Model', object)
os.chdir("F:\iNeuron\End2End\ML Project - KrishNaik - Gemstone Price Prediction\Studentmlprojectregression")

In [4]:
client._registry_uri

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [5]:
pwd

'F:\\iNeuron\\End2End\\ML Project - KrishNaik - Gemstone Price Prediction\\Studentmlprojectregression'

In [6]:
data_path = os.path.join("F:\iNeuron\End2End\ML Project - KrishNaik - Gemstone Price Prediction\Studentmlprojectregression\\notebook\data","gemstone.csv")
df = pd.read_csv(data_path)
df.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619
1,1,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387
2,2,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772
3,3,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666
4,4,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453


In [7]:
X = df.drop(labels=['id','price'],axis=1)
Y = df[['price']]

In [8]:
xtrain, xtest, ytrain, ytest = train_test_split(X,Y,test_size=0.2,random_state=42)

In [9]:
# KNeighborsRegressor().fit(xtrain,ytrain).predict(xtest)

In [10]:
# KNN = KNeighborsRegressor()
# KNN.fit(xtrain,ytrain)
# ypred = KNN.predict(xtest)

In [11]:
# pd.DataFrame(KNeighborsRegressor().fit(xtrain,ytrain).predict(xtest) == ypred)[0].unique()

In [12]:
# Define which columns should be ordinal-encoded and which should be scaled
categorical_cols = X.select_dtypes(include='object').columns
numerical_cols = X.select_dtypes(exclude='object').columns
            
# Define the custom ranking for each ordinal variable
cut_categories = ['Fair', 'Good', 'Very Good','Premium','Ideal']
color_categories = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_categories = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']

num_pipeline = Pipeline(
                steps = [
                ('imputer',SimpleImputer(strategy='median')),
                ('scaler',StandardScaler())                
                ]
            )

# Categorical Pipeline
cat_pipeline = Pipeline(
                steps=[
                ('imputer',SimpleImputer(strategy='most_frequent')),
                ('ordinal_encoder',OrdinalEncoder(categories=[cut_categories,color_categories,clarity_categories])),
                ('scaler',StandardScaler())
                ]
            )

preprocessor = ColumnTransformer(
                [
                ('num_pipeline',num_pipeline,numerical_cols),
                ('cat_pipeline',cat_pipeline,categorical_cols)
                ]
            )

In [13]:
xtrain = pd.DataFrame(preprocessor.fit_transform(xtrain),columns=preprocessor.get_feature_names_out())
xtest = pd.DataFrame(preprocessor.transform(xtest),columns=preprocessor.get_feature_names_out())

In [14]:
preprocessor.get_feature_names_out()

array(['num_pipeline__carat', 'num_pipeline__depth',
       'num_pipeline__table', 'num_pipeline__x', 'num_pipeline__y',
       'num_pipeline__z', 'cat_pipeline__cut', 'cat_pipeline__color',
       'cat_pipeline__clarity'], dtype=object)

In [15]:
def load_yaml(filepath:Path):
    try:
        filepath_,filename = os.path.split(filepath)
        with open(filepath) as yaml_file:
            config = yaml.load(yaml_file,
                               Loader = yaml.CLoader)
            # logger.info(f"{filename} yaml_file is loaded")
            return ConfigBox(config)
    except Exception as e:
        raise e

In [16]:
def eval_metrics(true , predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    # return mae, rmse, r2_square
    # tn, fp, fn, tp = confusion_matrix(y_true=y_true, y_pred=y_pred).ravel()      
    return {"MAE" : mae, 
            "MSE" : mse, 
            "RMSE": rmse, 
            "R2_Squared": r2_square}

In [None]:
# TOKEN = dagshub.auth.get_token()
# REPO = 'test_student_performance'
# USER_NAME = 'Raj-Narayanan-B'

In [None]:
# from dagshub.upload import create_repo
# repo_ = create_repo(repo_name = 'test_student_performance', private=False)

In [None]:
# repo_.upload_files

In [None]:
# !git clone https://{USER_NAME}:{TOKEN}@dagshub.com/{USER_NAME}/{REPO}.git

In [None]:
# %cd {REPO}

In [None]:
# !dvc get https://dagshub.com/Dean/Walkthrough requirements.txt
# !dvc get https://dagshub.com/Dean/Walkthrough src
# !dvc get https://dagshub.com/NirBarazida/hello-world-files data/

In [None]:
# dagshub.upload_files(local_path="data/", remote_path="data/",
#             commit_message="Added Raw Data",versioning="dvc",
#                repo=f"{USER_NAME}/{REPO}")

In [None]:
# !dvc remote add origin https://dagshub.com/Raj-Narayanan-B/test_student_performance.dvc -f

In [None]:
# !dvc remote modify origin --local auth basic 
# !dvc remote modify origin --local user Raj-Narayanan-B 
# !dvc remote modify origin --local password 8af4cc66be8aec751397fd525e47ae395fa67442

In [None]:
# dagshub.upload_files(repo = Raj-Narayanan-B/test_student_performance,
#     local_path = 'artifacts\data.csv',
#     commit_message = "adding data.csv",
#     remote_path = 'https://dagshub.com/Raj-Narayanan-B/test_student_performance/data',
# )

In [17]:
os.environ['MLFLOW_TRACKING_URI'] = "https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow"
os.environ['MLFLOW_TRACKING_USERNAME'] = "Raj-Narayanan-B"
os.environ['MLFLOW_TRACKING_PASSWORD'] = "8af4cc66be8aec751397fd525e47ae395fa67442"

In [18]:
# mlflow.set_tracking_uri("https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow")

In [19]:
client._registry_uri

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [20]:
mlflow.get_registry_uri()

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [21]:
mlflow.get_tracking_uri()

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [19]:
# mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [22]:
eval_metrics(np.linspace(1,5,10) , np.linspace(1,8,10))

{'MAE': 1.5,
 'MSE': 3.166666666666667,
 'RMSE': 1.7795130420052185,
 'R2_Squared': -0.9431818181818188}

In [21]:
# client = MlflowClient("http://127.0.0.1:5000")

In [22]:
# client.delete_registered_model("Challenger_Optuna_Linear Regression")
# client.delete_registered_model("HyperOpt_Trial_2_Linear_Regression")

In [23]:
# import numpy as np
# # Example lists
# names = np.arange(1,5,1)
# ages = np.arange(5,10,1)
# scores = np.arange(10,15,1)

# # Using zip() to combine lists element-wise
# combined_data = zip(names, ages, scores)

# # Converting the iterator to a list of tuples
# result_list = list(combined_data)

# # Displaying the result
# # print(result_list)
# a = 1
# b = 2
# c = 3
# d = 4
# # data = (a,b,c,d)
# # print (data)

# def sample(data_):
#     a1, b1, c1, d1 = data_
#     print ("a1: ",a1, 
#            "\nb1: ",b1,
#            "\nc1: ",c1,
#            "\nd1 :",d1)
# sample((a,b,c,d))

In [24]:
# should_register_model = True
# if should_register_model == True:
#     print (should_register_model)

In [25]:
# artifact_path = None
# if not artifact_path:
#     print(artifact_path)

# else:
#     print("There is no artifact_path")

In [26]:
# xgb = XGBRegressor()
# xgb.get_params()

In [27]:
# xgb.get_params()

In [28]:
# filtered_dict = {key: value for key, value in xgb.get_params().items() if value is not None}
# filtered_dict

In [None]:
# artifact_path = np.nan # or None
# if artifact_path:
#     print ("There is a value for artifact path")

# else:
#     print ("There is no value for artifact path")
#  1f4929da0cc54d2d8d4cf0e2e9062503
#  e7f87658e94f4fc7883ee8b20eb2bfb7

In [None]:
# hp_parent_runID = '0002d41fa67d4c02acc5430e07aaf7c2'
# op_parent_runID = 'afe7ed4cc84345b0a6475f30b43948f8'
# best_run_id = mlflow.search_runs(experiment_ids=[25],
#                     order_by = ['metrics.MSE'],
#                     filter_string = f"tags.run_type ilike 'parent'")[['run_id','artifact_uri','metrics.MSE']]['run_id'][0]
# best_run_id

In [None]:
# model_name = 'XGB_Regressor'
# artifact_path = f'optuna_{model_name}' if model_name == 'XGB_Regressor' else f'optuna_{model_name}'
# artifact_path

In [23]:
client.tracking_uri

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [24]:
client._registry_uri

'https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow'

In [None]:
client.search_registered_models()
# client.delete_registered_model('sample')

In [29]:
# client.delete_registered_model("Challenger_HyperOpt_Lasso")
# client.delete_registered_model("Challenger_HyperOpt_Linear Regression")
# client.delete_registered_model("Challenger_HyperOpt_Ridge")
# client.delete_registered_model("Challenger_Optuna_Lasso")
# client.delete_registered_model("Challenger_Optuna_Linear Regression")
# client.delete_registered_model("Challenger_Optuna_Ridge")
# client.delete_registered_model("Champion_Lasso")
# client.delete_registered_model("Champion_Linear Regression")
# client.delete_registered_model("Champion_Ridge")
# client.delete_registered_model("another_sample1")
# client.delete_registered_model("sample_test")



In [None]:
# client.create_registered_model(name = 'sample')
# # client.create_model_version(name = 'sample')

In [None]:
# mlflow.register_model()

In [None]:
# for i in range(18,29):
#     mlflow.delete_experiment(str(i))

In [None]:
# # data = (1,2,4)
# # def sample_(data):
# #     a,b,c = data
# #     print(a,b,c)
# # sample_(data)   
# # client.create_registered_model(name = 'sample')
# client.delete_registered_model("sample")
# client.delete_registered_model("Champion_Linear Regression")
# client.delete_registered_model("Champion_Ridge")
# client.delete_registered_model("Champion_Lasso")

# for i in range(51, 56):
#     mlflow.delete_experiment(str(i))

In [None]:
# client._registry_uri == client.tracking_uri

In [215]:
# params = client.get_run('7ec03febeb444500a73c2b8511607c82').data.params
# for key,value in params.items():
#     try:
#         params[key] = eval(value)
#     except:
#         params[key] = value
#         if value == 'nan':
#             params[key] = np.nan
# params        

{'objective': 'reg:squarederror',
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'device': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'gamma': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': 0.48822719829052513,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': None,
 'max_leaves': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'multi_strategy': None,
 'n_estimators': 179,
 'n_jobs': None,
 'num_parallel_tree': None,
 'random_state': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': 'approx',
 'validate_parameters': None,
 'verbosity': None}

In [194]:
# sample_xgb = XGBRegressor(**params)
# sample_xgb.fit(xtrain,ytrain)
# ypred = sample_xgb.predict(xtest)
# ypred

In [195]:
# xgb_pyfunc.predict(xtest)

In [216]:
def mlflow_logger(artifact_path: str, data = None, model = None, model_name: str = None, 
                  should_log_parent_model: bool = False, should_register_champion_model:bool = False, registered_model_name: str = None, 
                  run_id: str =  None, exp_id: int|list = None):
    import json
    
    if not artifact_path and should_register_champion_model == False:
        # x_train = data
        print("Client_Tracking_URI: ", client.tracking_uri)
        print("Client_Registry_URI: ", client._registry_uri)
        filter_string = f"tags.run_type ilike 'parent'"
        best_run_id = mlflow.search_runs(experiment_ids=[exp_id],
                                         order_by = ['metrics.MSE'],
                                         filter_string = filter_string)[['run_id','artifact_uri','metrics.MSE']]['run_id'][0]
        best_artifact_path = mlflow.search_runs(experiment_ids=[exp_id],
                                                order_by = ['metrics.MSE'],
                                                filter_string = filter_string)[['run_id','artifact_uri','metrics.MSE']]['artifact_uri'][0]
        artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
        print(f"\nBest_Run_ID: {best_run_id}")
        print(f"Best_Model's_Artifact_Path: {best_artifact_path}/{artifact_path_name}")

        client.create_registered_model(name = registered_model_name)
        client.create_model_version(name = registered_model_name,
                                    source = f"{best_artifact_path}/{artifact_path_name}",
                                    run_id = best_run_id)
    
    elif not artifact_path and should_register_champion_model == True:
        parent_runs = mlflow.search_registered_models()
        runs_df = mlflow.search_runs(experiment_ids = exp_id,
                            search_all_experiments = True,
                            filter_string = f"tags.run_type ilike 'parent'")
        runs_list_ = [parent_runs[i].latest_versions[0].run_id for i in range(len(parent_runs))]
        best_run = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['run_id'][0]
        best_artifact = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['artifact_uri'][0]
        artifact_path_name = json.loads(mlflow.get_run(f'{best_run}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
        model_name = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['tags.mlflow.runName'][0]
        model_name = model_name.replace("HyperOpt for ", "").replace("Optuna for ", "")
        client.create_registered_model(name = f"Champion {model_name}",
                                    tags = {"model_type": "champion"},
                                    description = f"{model_name} is the new champion model")
        client.create_model_version(name = f"Champion {model_name}",
                                    source = f"{best_artifact}/{artifact_path_name}",
                                    run_id = best_run,
                                    tags = {"model_type" : "champion",
                                            "model_name" : model_name})

    elif should_log_parent_model == True and should_register_champion_model == False:
        x_train, x_test, y_train, y_test = data
        filter_string=f"tags.mlflow.parentRunId ILIKE '{run_id}'"
        best_run_id = mlflow.search_runs(experiment_ids=[exp_id],
                        filter_string=filter_string,
                        order_by = ['metrics.MSE'])[['run_id','artifact_uri','metrics.MSE']]['run_id'][0]
        best_artifact_path = mlflow.search_runs(experiment_ids=[exp_id],
                        filter_string=filter_string,
                        order_by = ['metrics.MSE'])[['run_id','artifact_uri','metrics.MSE']]['artifact_uri'][0]
        artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
        print(f"Parent_Run_ID: {run_id}")
        print(f"Artifact_Path: {best_artifact_path}/{artifact_path_name}")
        if model_name == 'XGB_Regressor':
            best_model = mlflow.xgboost.load_model(f"{best_artifact_path}/{artifact_path_name}")
            params = client.get_run(best_run_id).data.params
            for key,value in params.items():
                try:
                    params[key] = eval(value)
                except:
                    params[key] = value
                    if value == 'nan':
                        params[key] = np.nan
            print("Best Params:\n",{key: value for key, value in params.items() if value is not None},"\n")
            signature = mlflow.xgboost.infer_signature(model_input = x_train,
                                                        model_output = best_model.predict(x_train),
                                                        params = {key: value for key, value in params.items() if value is not None})
            mlflow.xgboost.log_model(xgb_model = best_model,
                                     artifact_path = artifact_path,
                                     signature = signature)
        else:
            best_model = mlflow.sklearn.load_model(f"{best_artifact_path}/{artifact_path_name}")
            params = client.get_run(best_run_id).data.params
            for key,value in params.items():
                try:
                    params[key] = eval(value)
                except:
                    params[key] = value
                    if value == 'nan':
                        params[key] = np.nan
            print("Best Params:\n",{key: value for key, value in params.items() if value is not None},"\n")
            signature = mlflow.models.infer_signature(model_input = x_train,
                                                        model_output = best_model.predict(x_train),
                                                        params = {key: value for key, value in params.items() if value is not None})
            mlflow.sklearn.log_model(sk_model = best_model,
                                        artifact_path = artifact_path, 
                                        signature = signature)
        mlflow.log_params(params = params)
        mlflow.log_metrics(metrics = eval_metrics(y_test , best_model.set_params(**params).fit(x_train, y_train).predict(x_test)))

    else:
        x_train, y_test, y_pred = data
        mlflow.log_metrics(metrics = eval_metrics(y_test , y_pred)) 
        mlflow.log_params(params = model.get_params()) 
        if model_name == 'XGB_Regressor': 
            signature = mlflow.xgboost.infer_signature(model_input = x_train,
                                                      model_output = model.predict(x_train),
                                                      params = {key: value for key, value in model.get_params().items() if value is not None}) 
            mlflow.xgboost.log_model(xgb_model = model, 
                                        artifact_path = artifact_path,
                                        signature = signature)
        else:            
            signature = mlflow.models.infer_signature(model_input = x_train,
                                                      model_output = model.predict(x_train),
                                                      params = {key: value for key, value in model.get_params().items() if value is not None}) 
            mlflow.sklearn.log_model(sk_model = model, 
                                        artifact_path = artifact_path,
                                        signature = signature)   

In [218]:
PARAMS_PATH = 'params.yaml'
trial_number = 0
def parameter_tuning(model_class : ML_Model, 
                     model_name: str, 
                     x_train: pd.DataFrame, 
                     x_test: pd.DataFrame, 
                     y_train: pd.DataFrame, 
                     y_test: pd.DataFrame,
                     report_: dict,
                     *args):
       
    tuner_report = {}
    tuner_report['Optuna'] = {}
    tuner_report['HyperOpt'] = {}
    params_config = load_yaml(PARAMS_PATH)
    exp_id_list = []

    tags = {"tuner_1": "optuna",
            "tuner_2": "hyperopt",
            "metrics": "['mae', 'mse', 'rmse', 'r2_square']"} 
    exp_id = client.create_experiment(name = f"25_{model_name}_25", tags = tags) 

####################################################### OPTUNA #######################################################
    with mlflow.start_run(experiment_id = exp_id,
                          run_name = f"Optuna for {model_name}",
                          tags = {"tuner" : "optuna",
                                  "run_type": "parent"}) as optuna_parent_run:
        parent_run_id = optuna_parent_run.info.run_id

        def optuna_objective(trial):
            with mlflow.start_run(experiment_id = exp_id,
                                  run_name = f"Trial {(trial.number)+1} for {model_name} (optuna)",
                                  tags = {"run_type": "child"},
                                  nested = True) as child_run:
                space_optuna = {}
                for key,value in params_config['optuna'][model_name].items():
                    space_optuna[key] = eval(value)
                if model_name == 'Stacked_Classifier':
                    model = model_class.set_params(**space_optuna)
                else:
                    model = model_class(**space_optuna)
                # model.set_params(**space_optuna)
                model.fit(x_train, y_train)
                y_pred = model.predict(x_test)
                
                mse = eval_metrics(y_test , y_pred)["MSE"]
                
                data = (x_train, y_test, y_pred)
                mlflow_logger(data = data,
                              model = model,
                              model_name = model_name,
                            #   params = model.get_params(),
                              should_log_parent_model = False,
                              artifact_path = f'optuna_{model_name}' if model_name == 'XGB_Regressor' else f'optuna_{model_name}')           
                print("Artifacts URI of Optuna Child Run: ",mlflow.get_artifact_uri())
                return mse
            
        print("Artifacts URI of Optuna Parent Run: ",mlflow.get_artifact_uri())
        find_param=optuna.create_study(direction = "minimize")
        find_param.optimize(optuna_objective,n_trials=2)

        data = (x_train, x_test, y_train, y_test)
        mlflow_logger(data = data,
                      model_name = model_name,
                      should_log_parent_model = True,
                      run_id = parent_run_id,
                      exp_id = exp_id,
                    #   registered_model_name = f"Challenger_Optuna_{model_name}",
                      artifact_path = f'challenger_optuna_{model_name}' if model_name == 'XGB_Regressor' else f'challenger_optuna_{model_name}')

        tuner_report['Optuna'] = {'MSE':find_param.best_value, 'params': find_param.best_params}
        print (f"Optuna: {model_name} --- {tuner_report['Optuna']}\n\n")

####################################################### HYPEROPT #######################################################
    with mlflow.start_run(experiment_id = exp_id,
                          run_name = f"HyperOpt for {model_name}",
                          tags = {"tuner" : "hyperopt",
                                  "run_type": "parent"}) as hyperopt_parent_run:
        parent_run_id = hyperopt_parent_run.info.run_id
        global trial_number
        # trial_number = 0
        def hp_objective(space):
            global trial_number
            trial_number += 1
            with mlflow.start_run(experiment_id = exp_id,
                                  run_name = f"Trial {trial_number} for {model_name} (hyperopt)",
                                  tags = {"run_type": "child"},
                                  nested = True):
                
                if model_name == 'Stacked_Classifier':
                    model = model_class.set_params(**space)
                else:
                    model = model_class(**space)
                # model.set_params(**space)
                model.fit(x_train, y_train)
                y_pred = model.predict(x_test)

                mse = eval_metrics(y_test , y_pred)["MSE"]
                print ("MSE: ", mse)

                data = (x_train, y_test, y_pred)
                mlflow_logger(data = data,
                              model = model,
                              model_name = model_name,
                            #   params = model.get_params(),
                              should_log_parent_model = False,
                              artifact_path = f'hyperopt_{model_name}' if model_name == 'XGB_Regressor' else f'hyperopt_{model_name}')
                
                print("Artifacts URI of HyperOpt Child Run: ",mlflow.get_artifact_uri())
                return mse
        print("Artifacts URI of HyperOpt Parent Run: ",mlflow.get_artifact_uri())
        trials = Trials()
        space = {}
        for key,value in params_config['hyperopt'][model_name].items():
            space[key] = eval(value)
        best = fmin(fn= hp_objective,
                    space= space,
                    algo= tpe.suggest,
                    max_evals = 2,
                    trials= trials)
        best_params = space_eval(space,best)

        data = (x_train, x_test, y_train, y_test)
        mlflow_logger(data = data,
                      model_name = model_name,
                      should_log_parent_model = True,
                      run_id = parent_run_id,
                      exp_id = exp_id,
                    #   registered_model_name = f"Challenger_HyperOpt_{model_name}",
                      artifact_path = f'challenger_hyperopt_{model_name}' if model_name == 'XGB_Regressor' else f'challenger_hyperopt_{model_name}')

        tuner_report['HyperOpt'] = {'MSE':int(trials.average_best_error()), 'params': best_params}
        print (f"HyperOpt: {model_name} --- {tuner_report['HyperOpt']}\n\n")
        trial_number = 0

####################################################### Best_RMSE & Best_Fittable_Params #######################################################
    min_mse_value = min(tuner_report['Optuna']['MSE'],tuner_report['HyperOpt']['MSE'])
    if min_mse_value == tuner_report['Optuna']['MSE']:
        params = tuner_report['Optuna']['params']
    else:
        params = tuner_report['HyperOpt']['params']
    tuner_report['Fittable_Params'] = params
    tuner_report['Best_MSE'] = min_mse_value

    report_[model_name] = tuner_report
    print (f'\n\n{model_name}\nMin MSE: {min_mse_value}\n{report_[model_name]}\n\n')
    # print(report_.values())
    mses = [value['Best_MSE'] for value in report_.values()]
    min_mse = min(mses)
    best_model_so_far_ = [(i, min_mse, report_[i]['Fittable_Params']) for i in report_.keys() if min_mse == report_[i]['Best_MSE']]

    data = x_train
    mlflow_logger(data = data,
                  model_name = model_name,
                #   should_register_model = True,
                  exp_id = exp_id,
                  registered_model_name = f"Challenger_{model_name}",
                  artifact_path = None)
    exp_id_list.append(exp_id)

    return (tuner_report, report_, best_model_so_far_, exp_id_list)

In [219]:
models = {
    # "Decision_Tree_Regressor": DecisionTreeRegressor,
    # "Random Forest": RandomForestRegressor,
    "XGB_Regressor": XGBRegressor, 
    # # "CatBoosting Regressor": CatBoostRegressor(verbose=False),
    # "Ada_Boost": AdaBoostRegressor,
    "Linear Regression": LinearRegression,
    "Lasso": Lasso,
    "Ridge": Ridge,
}
experiment_names = [name for name in models.keys()]
report = {}
exp_id_list = []

for model_key, model_value in models.items():
    tuning_report,reports, best_model_so_far, exp_id_list_ = parameter_tuning(model_class = model_value,
                                                                model_name = model_key,
                                                                x_train = xtrain,
                                                                x_test = xtest,
                                                                y_train = ytrain,
                                                                y_test = ytest,
                                                                report_ = report)
    for i in exp_id_list_:
        exp_id_list.append(i)
    report[model_key] = reports[model_key]
    best_model_so_far_ = best_model_so_far
    print(f"\nBest model so far: {best_model_so_far_[0]}\n")
    print(f"Model: {model_key}\nReport:\n{tuning_report}\n")
    print("Experiment_ IDs: ",exp_id_list)

mlflow_logger(exp_id = exp_id_list,
              should_register_champion_model=True,
              artifact_path=None)

[I 2024-01-23 23:17:58,715] A new study created in memory with name: no-name-84067bcb-e73a-482c-a941-3d87b05c9309


Artifacts URI of Optuna Parent Run:  mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/4c89e089c82643b2bb63193649d7a82a/artifacts
Artifacts URI of Optuna Child Run:  mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/8caf8a21673043de89b127e62c3a9cee/artifacts


[I 2024-01-23 23:18:18,537] Trial 0 finished with value: 434695.8889700371 and parameters: {'n_estimators': 153, 'learning_rate': 0.8908728802305644, 'tree_method': 'exact'}. Best is trial 0 with value: 434695.8889700371.


Artifacts URI of Optuna Child Run:  mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/2689af18121e4ceba21b13ea32b2d6f3/artifacts


[I 2024-01-23 23:18:34,472] Trial 1 finished with value: 372997.47569661774 and parameters: {'n_estimators': 103, 'learning_rate': 0.6202072641830931, 'tree_method': 'exact'}. Best is trial 1 with value: 372997.47569661774.


Parent_Run_ID: 4c89e089c82643b2bb63193649d7a82a
Artifact_Path: mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/2689af18121e4ceba21b13ea32b2d6f3/artifacts/optuna_XGB_Regressor


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'objective': 'reg:squarederror', 'enable_categorical': False, 'learning_rate': 0.6202072641830931, 'missing': nan, 'n_estimators': 103, 'tree_method': 'exact'} 

Optuna: XGB_Regressor --- {'MSE': 372997.47569661774, 'params': {'n_estimators': 103, 'learning_rate': 0.6202072641830931, 'tree_method': 'exact'}}


Artifacts URI of HyperOpt Parent Run:  mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/56c7e317b8c84c6592360e273f344d45/artifacts
MSE:                                                 
425747.7510581421                                    
Artifacts URI of HyperOpt Child Run:                 
mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d24c/39235bd1cec64298a7c938bcd664aebb/artifacts
MSE:                                                                          
341204.2685284092                                                             
Artifacts URI of HyperOpt Child Run:                                          
mlflow-artifacts:/c06ec037261a4fc4b77cef53b7e1d

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'objective': 'reg:squarederror', 'enable_categorical': False, 'learning_rate': 0.26050011078590574, 'missing': nan, 'n_estimators': 169, 'tree_method': 'hist'} 

HyperOpt: XGB_Regressor --- {'MSE': 341204, 'params': {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'}}




XGB_Regressor
Min MSE: 341204
{'Optuna': {'MSE': 372997.47569661774, 'params': {'n_estimators': 103, 'learning_rate': 0.6202072641830931, 'tree_method': 'exact'}}, 'HyperOpt': {'MSE': 341204, 'params': {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'}}, 'Fittable_Params': {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'}, 'Best_MSE': 341204}


Client_Tracking_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow
Client_Registry_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow

Best_Run_ID: 56c7e317b8c84c6592360e273f344d45
Best_Model's_Artifact_Path: mlflow-art

2024/01/23 23:20:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Challenger_XGB_Regressor, version 1



Best model so far: ('XGB_Regressor', 341204, {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'})

Model: XGB_Regressor
Report:
{'Optuna': {'MSE': 372997.47569661774, 'params': {'n_estimators': 103, 'learning_rate': 0.6202072641830931, 'tree_method': 'exact'}}, 'HyperOpt': {'MSE': 341204, 'params': {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'}}, 'Fittable_Params': {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'}, 'Best_MSE': 341204}

Experiment_ IDs:  ['56']


[I 2024-01-23 23:20:02,948] A new study created in memory with name: no-name-af5cbdc4-5fbc-4926-901c-3eb164a4d73d


Artifacts URI of Optuna Parent Run:  mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/dc39905a13254f36a039e53ad0b3ab76/artifacts
Artifacts URI of Optuna Child Run:  mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/3c19598e651944c39b07638f444e9e4f/artifacts


[I 2024-01-23 23:20:14,224] Trial 0 finished with value: 1013245.5452810809 and parameters: {'fit_intercept': True}. Best is trial 0 with value: 1013245.5452810809.


Artifacts URI of Optuna Child Run:  mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/9918e140f391415cbcd9c322d954e88a/artifacts


[I 2024-01-23 23:20:24,369] Trial 1 finished with value: 16794676.448169537 and parameters: {'fit_intercept': False}. Best is trial 0 with value: 1013245.5452810809.


Parent_Run_ID: dc39905a13254f36a039e53ad0b3ab76
Artifact_Path: mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/3c19598e651944c39b07638f444e9e4f/artifacts/optuna_Linear Regression


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'copy_X': True, 'fit_intercept': True, 'positive': False} 

Optuna: Linear Regression --- {'MSE': 1013245.5452810809, 'params': {'fit_intercept': True}}


Artifacts URI of HyperOpt Parent Run:  mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/8402ab42cbf844b58548a672decdc0c5/artifacts
MSE:                                                 
1013245.5452810809                                   
Artifacts URI of HyperOpt Child Run:                 
mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/d9fa8681bf584ba483978cde42cbafaa/artifacts
MSE:                                                                           
16794676.448169537                                                             
Artifacts URI of HyperOpt Child Run:                                           
mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/60f82664a6844fe29180459e59c3f3d8/artifacts
100%|██████████| 2/2 [00:20<00:00, 10.03s/trial, best loss: 1013245.5452810809]
Parent_Run_ID: 8402ab42cbf8

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'copy_X': True, 'fit_intercept': True, 'positive': False} 

HyperOpt: Linear Regression --- {'MSE': 1013245, 'params': {'fit_intercept': True}}




Linear Regression
Min MSE: 1013245
{'Optuna': {'MSE': 1013245.5452810809, 'params': {'fit_intercept': True}}, 'HyperOpt': {'MSE': 1013245, 'params': {'fit_intercept': True}}, 'Fittable_Params': {'fit_intercept': True}, 'Best_MSE': 1013245}


Client_Tracking_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow
Client_Registry_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow

Best_Run_ID: 8402ab42cbf844b58548a672decdc0c5
Best_Model's_Artifact_Path: mlflow-artifacts:/0f9b39414fa14a63bab9de3b1da0c4a3/8402ab42cbf844b58548a672decdc0c5/artifacts/challenger_hyperopt_Linear Regression


2024/01/23 23:21:12 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Challenger_Linear Regression, version 1



Best model so far: ('XGB_Regressor', 341204, {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'})

Model: Linear Regression
Report:
{'Optuna': {'MSE': 1013245.5452810809, 'params': {'fit_intercept': True}}, 'HyperOpt': {'MSE': 1013245, 'params': {'fit_intercept': True}}, 'Fittable_Params': {'fit_intercept': True}, 'Best_MSE': 1013245}

Experiment_ IDs:  ['56', '57']


[I 2024-01-23 23:21:13,432] A new study created in memory with name: no-name-09e9a49c-0878-46ca-b394-8caec073ef79


Artifacts URI of Optuna Parent Run:  mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/34898b9e47344726b1db8656ba72e96d/artifacts
Artifacts URI of Optuna Child Run:  mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/b5c17da82573450cbbbe53d2372d0a9c/artifacts


[I 2024-01-23 23:21:24,475] Trial 0 finished with value: 1017342.6949566082 and parameters: {'alpha': 4.519561995433948, 'selection': 'cyclic'}. Best is trial 0 with value: 1017342.6949566082.


Artifacts URI of Optuna Child Run:  mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/55fb229ada4f42879ecd0b95dce7f1b0/artifacts


[I 2024-01-23 23:21:37,365] Trial 1 finished with value: 1016510.0459203522 and parameters: {'alpha': 3.629548208149884, 'selection': 'random'}. Best is trial 1 with value: 1016510.0459203522.


Parent_Run_ID: 34898b9e47344726b1db8656ba72e96d
Artifact_Path: mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/55fb229ada4f42879ecd0b95dce7f1b0/artifacts/optuna_Lasso


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'alpha': 3.629548208149884, 'copy_X': True, 'fit_intercept': True, 'max_iter': 1000, 'positive': False, 'precompute': False, 'selection': 'random', 'tol': 0.0001, 'warm_start': False} 

Optuna: Lasso --- {'MSE': 1016510.0459203522, 'params': {'alpha': 3.629548208149884, 'selection': 'random'}}


Artifacts URI of HyperOpt Parent Run:  mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/34ffe8fc46b048eaa479dbc28c78e310/artifacts
MSE:                                                 
1014226.8631539972                                   
Artifacts URI of HyperOpt Child Run:                 
mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/c1382016df0b4590a431f2d4240590c9/artifacts
MSE:                                                                           
1016440.3716683302                                                             
Artifacts URI of HyperOpt Child Run:                                           
mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/c0fc9909

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'alpha': 0.56492386755904, 'copy_X': True, 'fit_intercept': True, 'max_iter': 1000, 'positive': False, 'precompute': False, 'selection': 'random', 'tol': 0.0001, 'warm_start': False} 

HyperOpt: Lasso --- {'MSE': 1014226, 'params': {'alpha': 0.56492386755904, 'selection': 'random'}}




Lasso
Min MSE: 1014226
{'Optuna': {'MSE': 1016510.0459203522, 'params': {'alpha': 3.629548208149884, 'selection': 'random'}}, 'HyperOpt': {'MSE': 1014226, 'params': {'alpha': 0.56492386755904, 'selection': 'random'}}, 'Fittable_Params': {'alpha': 0.56492386755904, 'selection': 'random'}, 'Best_MSE': 1014226}


Client_Tracking_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow
Client_Registry_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow

Best_Run_ID: 34ffe8fc46b048eaa479dbc28c78e310
Best_Model's_Artifact_Path: mlflow-artifacts:/851199c1a3fd4315bff545b1e9446c49/34ffe8fc46b048eaa479dbc28c78e310/artifacts/challenger_hyperopt_Lasso


2024/01/23 23:22:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Challenger_Lasso, version 1



Best model so far: ('XGB_Regressor', 341204, {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'})

Model: Lasso
Report:
{'Optuna': {'MSE': 1016510.0459203522, 'params': {'alpha': 3.629548208149884, 'selection': 'random'}}, 'HyperOpt': {'MSE': 1014226, 'params': {'alpha': 0.56492386755904, 'selection': 'random'}}, 'Fittable_Params': {'alpha': 0.56492386755904, 'selection': 'random'}, 'Best_MSE': 1014226}

Experiment_ IDs:  ['56', '57', '58']


[I 2024-01-23 23:22:43,908] A new study created in memory with name: no-name-88877eb5-4a8b-4517-b5e3-577addb2803c


Artifacts URI of Optuna Parent Run:  mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/ea696cbfe20d43c2b938525c93de63ad/artifacts
Artifacts URI of Optuna Child Run:  mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/709d2e0adc73426ab8009001c245a6b3/artifacts


[I 2024-01-23 23:22:56,348] Trial 0 finished with value: 1013310.3924545605 and parameters: {'alpha': 5.4379870364260965, 'solver': 'saga'}. Best is trial 0 with value: 1013310.3924545605.


Artifacts URI of Optuna Child Run:  mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/752a6e321ba0436aa9fe067c2a765f2e/artifacts


[I 2024-01-23 23:23:08,998] Trial 1 finished with value: 1013317.005639842 and parameters: {'alpha': 6.017759147738401, 'solver': 'saga'}. Best is trial 0 with value: 1013310.3924545605.


Parent_Run_ID: ea696cbfe20d43c2b938525c93de63ad
Artifact_Path: mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/709d2e0adc73426ab8009001c245a6b3/artifacts/optuna_Ridge


Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'alpha': 5.4379870364260965, 'copy_X': True, 'fit_intercept': True, 'positive': False, 'solver': 'saga', 'tol': 0.0001} 

Optuna: Ridge --- {'MSE': 1013310.3924545605, 'params': {'alpha': 5.4379870364260965, 'solver': 'saga'}}


Artifacts URI of HyperOpt Parent Run:  mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/ba806805f1d54288bc83ebeb3fd1dcb8/artifacts
MSE:                                                 
1013255.6846993136                                   
Artifacts URI of HyperOpt Child Run:                 
mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/cd36823b2991491991b70dfa01286c22/artifacts
MSE:                                                                           
1013314.2260273049                                                             
Artifacts URI of HyperOpt Child Run:                                           
mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/9edc4547208d47218dc3905724d56f40/artifacts
100%|██████████| 2/2 [00:21<00:00

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

Best Params:
 {'alpha': 0.9605197162724851, 'copy_X': True, 'fit_intercept': True, 'positive': False, 'solver': 'auto', 'tol': 0.0001} 

HyperOpt: Ridge --- {'MSE': 1013255, 'params': {'alpha': 0.9605197162724851, 'solver': 'auto'}}




Ridge
Min MSE: 1013255
{'Optuna': {'MSE': 1013310.3924545605, 'params': {'alpha': 5.4379870364260965, 'solver': 'saga'}}, 'HyperOpt': {'MSE': 1013255, 'params': {'alpha': 0.9605197162724851, 'solver': 'auto'}}, 'Fittable_Params': {'alpha': 0.9605197162724851, 'solver': 'auto'}, 'Best_MSE': 1013255}


Client_Tracking_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow
Client_Registry_URI:  https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow

Best_Run_ID: ba806805f1d54288bc83ebeb3fd1dcb8
Best_Model's_Artifact_Path: mlflow-artifacts:/4c61d2306f0a4f9a9a9b8ec163aef503/ba806805f1d54288bc83ebeb3fd1dcb8/artifacts/challenger_hyperopt_Ridge


2024/01/23 23:23:58 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Challenger_Ridge, version 1



Best model so far: ('XGB_Regressor', 341204, {'learning_rate': 0.26050011078590574, 'n_estimators': 169, 'tree_method': 'hist'})

Model: Ridge
Report:
{'Optuna': {'MSE': 1013310.3924545605, 'params': {'alpha': 5.4379870364260965, 'solver': 'saga'}}, 'HyperOpt': {'MSE': 1013255, 'params': {'alpha': 0.9605197162724851, 'solver': 'auto'}}, 'Fittable_Params': {'alpha': 0.9605197162724851, 'solver': 'auto'}, 'Best_MSE': 1013255}

Experiment_ IDs:  ['56', '57', '58', '59']


2024/01/23 23:24:01 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Champion XGB_Regressor, version 1


In [31]:
source = mlflow.search_registered_models(filter_string = f"tags.model_type ilike 'champion'")[0].latest_versions[0].source
pyfunc_model = mlflow.pyfunc.load_model(model_uri = source,
                         dst_path = "artifacts\model")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [42]:
pyfunc_model._model_impl.xgb_model.__class__.__name__

'XGBRegressor'

In [33]:
model = mlflow.pyfunc.load_model(f'file:artifacts\model\{pyfunc_model.metadata.artifact_path}')
model

mlflow.pyfunc.loaded_model:
  artifact_path: challenger_hyperopt_XGB_Regressor
  flavor: mlflow.xgboost
  run_id: 56c7e317b8c84c6592360e273f344d45

In [1]:
params = client.get_run(model.metadata.run_id).data.params
for key, value in params.items():
    try:
        params[key] = eval(value)
    except:
        params[key] = value
        if value == 'nan':
            params[key] = np.nan
params

NameError: name 'client' is not defined

In [2]:
ypred = model.predict(data = xtest,
              params = params)
eval_metrics(true = ytest, predicted = ypred)

NameError: name 'model' is not defined

In [34]:
ypred_ = model.predict(data = xtest)
            #   params = params)
eval_metrics(true = ytest, predicted = ypred_)

{'MAE': 295.64565271798546,
 'MSE': 341204.2685284092,
 'RMSE': 584.1269284397092,
 'R2_Squared': 0.9788853201053588}

In [59]:
# xgb = XGBRegressor(**{'n_estimators': 180, 'learning_rate': 2.5, 'booster': 'dart', 'tree_method': 'approx'})
# xgb.fit(xtrain,ytrain)
# ypred = xgb.predict(xtest)
# ypred

array([5.222161e+33, 5.222161e+33, 5.222161e+33, ..., 5.222161e+33,
       5.222161e+33, 5.222161e+33], dtype=float32)

In [81]:
xgb_df = mlflow.search_runs(experiment_ids=[51])

In [84]:
xgb_df['params.n_estimators']

0     164
1     164
2    None
3     161
4     179
5    None
Name: params.n_estimators, dtype: object

In [122]:
# cols = []
# for i in xgb_df.columns:
    

xgboost.sklearn.XGBRegressor

In [210]:

xgb_pyfunc = mlflow.pyfunc.load_model("mlflow-artifacts:/8ca767dc693a4cf0bbdb84a11f4688eb/7ec03febeb444500a73c2b8511607c82/artifacts/optuna_XGB_Regressor")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [214]:
xgb_pyfunc.metadata.run_id

'7ec03febeb444500a73c2b8511607c82'

In [160]:
xgb_pyfunc.metadata.run_id

'7ec03febeb444500a73c2b8511607c82'

In [164]:
client.get_run('7ec03febeb444500a73c2b8511607c82').data.metrics

{'MAE': 302.74589490372,
 'MSE': 367812.623370314,
 'RMSE': 606.475575246287,
 'R2_Squared': 0.9772387202623}

In [165]:
xgb_pyfunc.metadata.run_id
client.get_run('7ec03febeb444500a73c2b8511607c82').data.params

{'objective': 'reg:squarederror',
 'base_score': 'None',
 'booster': 'None',
 'callbacks': 'None',
 'colsample_bylevel': 'None',
 'colsample_bynode': 'None',
 'colsample_bytree': 'None',
 'device': 'None',
 'early_stopping_rounds': 'None',
 'enable_categorical': 'False',
 'eval_metric': 'None',
 'feature_types': 'None',
 'gamma': 'None',
 'grow_policy': 'None',
 'importance_type': 'None',
 'interaction_constraints': 'None',
 'learning_rate': '0.48822719829052513',
 'max_bin': 'None',
 'max_cat_threshold': 'None',
 'max_cat_to_onehot': 'None',
 'max_delta_step': 'None',
 'max_depth': 'None',
 'max_leaves': 'None',
 'min_child_weight': 'None',
 'missing': 'nan',
 'monotone_constraints': 'None',
 'multi_strategy': 'None',
 'n_estimators': '179',
 'n_jobs': 'None',
 'num_parallel_tree': 'None',
 'random_state': 'None',
 'reg_alpha': 'None',
 'reg_lambda': 'None',
 'sampling_method': 'None',
 'scale_pos_weight': 'None',
 'subsample': 'None',
 'tree_method': 'approx',
 'validate_parameters':

In [199]:
xgb_model_ = mlflow.xgboost.load_model("mlflow-artifacts:/8ca767dc693a4cf0bbdb84a11f4688eb/7ec03febeb444500a73c2b8511607c82/artifacts/optuna_XGB_Regressor")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [209]:
xgb_model_.

AttributeError: 'XGBRegressor' object has no attribute 'metadata'

In [152]:
xgb_model = mlflow.xgboost.load_model("mlflow-artifacts:/8ca767dc693a4cf0bbdb84a11f4688eb/925ac1fb7019490ebb96212c7f38d26b/artifacts/challenger_hyperopt_XGB_Regressor")

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

In [217]:
# for i in range(51, 56):
#     mlflow.delete_experiment(str(i))

In [44]:
ytest.shape

(38715, 1)

In [34]:
ytest.isna().sum()

price    0
dtype: int64

In [35]:
xtrain.isna().sum()

num_pipeline__carat      0
num_pipeline__depth      0
num_pipeline__table      0
num_pipeline__x          0
num_pipeline__y          0
num_pipeline__z          0
cat_pipeline__cut        0
cat_pipeline__color      0
cat_pipeline__clarity    0
dtype: int64

In [36]:
ytrain.isna().sum()

price    0
dtype: int64

In [37]:
xtest.isna().sum()

num_pipeline__carat      0
num_pipeline__depth      0
num_pipeline__table      0
num_pipeline__x          0
num_pipeline__y          0
num_pipeline__z          0
cat_pipeline__cut        0
cat_pipeline__color      0
cat_pipeline__clarity    0
dtype: int64

In [None]:
mlflow.get_registry_uri()

In [None]:
# for i in range(29,36):
#     mlflow.delete_experiment(str(i))

In [None]:
# client.get_experiment(657234830053300607)

In [None]:
# client.get_experiment(0)

In [None]:
# mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [None]:
mlflow.get_tracking_uri()

In [None]:
# from mlflow.tracking.fluent import delete_experiment


# mlflow.delete_experiment('4')

In [None]:
import mlflow.pyfunc
model = mlflow.pyfunc.load_model('mlflow-artifacts:/a49703d315d345439b5d5b93f39980ca/4f7e9b02c89b42eeb401c9717452d5db/artifacts/challenger_hyperopt_Lasso')
print(model._model_meta._signature)

# model_detail = client.get_registered_model("Champion_Lasso")
# model_detail.from_proto.__getattribute__.

In [None]:
parent_runs = mlflow.search_registered_models()

In [None]:
for i in range(len(parent_runs)):
    print(parent_runs[i].latest_versions[0].run_id)

In [None]:
parent_runs

In [None]:
parent_runs[0].latest_versions[0].run_id

In [None]:
parent_runs[1].latest_versions[0].run_id

In [None]:
parent_runs[2].latest_versions[0].run_id

In [None]:
mlflow.search_runs(filter_string = "run_id ilike '8df0a2d34329402195dbdf87cc1efa92'")

In [None]:
# client.delete_experiment('14')
# client.delete_experiment('15')
# client.delete_experiment('16')
# client.delete_experiment('17')

In [None]:
mlflow.search_runs(experiment_ids = [13,14,15],
                    search_all_experiments = True,
                    order_by = ['metrics.RMSE ASC'])['artifact_uri'][0]

In [None]:
mlflow.get_run('8a40d1fee8d2423695067348b81b4421').data.tags['mlflow.log-model.history']

In [None]:
import json


In [None]:
# for i in models.keys():
#     print(mlflow.get_experiment_by_name(f"_____{i}_____").experiment_id)
runs_df = mlflow.search_runs(experiment_ids = [36,37,38],
                    search_all_experiments = True,
                    filter_string = f"tags.run_type ilike 'parent'")#['artifact_uri']
# mlflow.sklearn.load_model()

In [None]:
runs_list_ = [parent_runs[i].latest_versions[0].run_id for i in range(len(parent_runs))]
runs_list_

In [None]:
best_run = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['run_id'][0]
best_artifact = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['artifact_uri'][0]

In [None]:
best_run = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['run_id'][0]
best_artifact = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['artifact_uri'][0]
print(best_run)
print(best_artifact)

In [None]:
runs_df

In [None]:
client._registry_uri

In [None]:
parent_runs = mlflow.search_registered_models()
runs_df = mlflow.search_runs(experiment_ids = [36,37,38],
                    search_all_experiments = True,
                    filter_string = f"tags.run_type ilike 'parent'")
runs_list_ = [parent_runs[i].latest_versions[0].run_id for i in range(len(parent_runs))]
best_run = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['run_id'][0]
best_artifact = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['artifact_uri'][0]
artifact_path_name = json.loads(mlflow.get_run(f'{best_run}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
model_name = runs_df[runs_df['run_id'].isin(runs_list_)].sort_values(by = "metrics.MSE").reset_index(drop=True)['tags.mlflow.runName'][0]
model_name = model_name.replace("HyperOpt for ", "").replace("Optuna for ", "")

# print(best_run)
# print(best_artifact)
# print(model_name)
client.create_registered_model(name = f"Champion {model_name}",
                               tags = {"model_type": "champion"},
                               description = f"{model_name} is the new champion model")
client.create_model_version(name = f"Champion {model_name}",
                            source = f"{best_artifact}/{artifact_path_name}",
                            run_id = best_run,
                            tags = {"model_type" : "champion",
                                    "model_name" : model_name})

In [None]:
artifact_path_name

In [None]:
from mlflow import track

In [None]:
# model_name = 'HyperOpt for Voting_Regressor'
# model_name = model_name.replace("HyperOpt for ", "").replace("Optuna for ", "")
# model_name

In [None]:
# from mlflow.tracking import MlflowClient

# client_ = MlflowClient(tracking_uri="https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow",
#                       registry_uri="https://dagshub.com/Raj-Narayanan-B/StudentMLProjectRegression.mlflow")

# client.search_runs()

In [None]:
mlflow.search_runs(#experiment_ids = [36],
                    #search_all_experiments = True,
                    order_by = ['metrics.RMSE ASC'])

In [None]:
client.create_registered_model(name = "HyperOpt_Trial_2_Linear_Regression")

In [None]:
client.create_model_version()

In [None]:
run_id = "b3b9527dd9a24d7a9fa169c6419a3a70"
filter_string_ = f"run_id='{run_id}'"


client.search_model_versions(filter_string = filter_string_)

In [None]:
artifact_path_name

In [None]:
mlflow.arti

In [None]:
artifact_path

In [None]:
mlflow.sklearn.load_model(model_uri = f"{artifact_path}/{artifact_path_name}")

In [None]:
filter_string=f"tags.mlflow.parentRunId ILIKE '0ec519647e624d45a80679789b45abf6'"
mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]

In [None]:
filter_string=f"tags.mlflow.parentRunId ILIKE '0ec519647e624d45a80679789b45abf6'"
best_run_id = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['run_id'][0]
best_run_id

In [None]:
filter_string=f"tags.mlflow.parentRunId ILIKE '0ec519647e624d45a80679789b45abf6'"
best_artifact_path = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['artifact_uri'][0]
best_artifact_path

In [None]:
artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
artifact_path_name

In [None]:
f"{best_artifact_path}/{artifact_path_name}"

In [None]:
mlflow.sklearn.load_model(f"{best_artifact_path}/{artifact_path_name}")

In [None]:
mlflow.search_runs(experiment_ids=[13]).columns

In [None]:
filter_string=f"tags.mlflow.parentRunId ILIKE '0ec519647e624d45a80679789b45abf6'"
best_run_id = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['run_id'][0]
best_artifact_path = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['artifact_uri'][0]
artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
print(artifact_path_name)
mlflow.sklearn.load_model(f"{best_artifact_path}/{artifact_path_name}")

In [None]:
filter_string=f"tags.mlflow.parentRunId ILIKE '1a39c3f0de20420aababdaa0d7535956'"
best_run_id = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['run_id'][0]
best_artifact_path = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['artifact_uri'][0]
artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
print(artifact_path_name)
mlflow.sklearn.load_model(f"{best_artifact_path}/{artifact_path_name}")

In [None]:
mlflow.search_runs(experiment_ids=[14],
                #    filter_string=filter_string,
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]

In [None]:
# filter_string=f"tags.mlflow.parentRunId ILIKE '{hyper_opt_parent_run_id}'"
best_run_id = mlflow.search_runs(experiment_ids=[14],
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['run_id'][0]
best_artifact_path = mlflow.search_runs(experiment_ids=[14],
                   order_by = ['metrics.RMSE'])[['run_id','artifact_uri','metrics.RMSE']]['artifact_uri'][0]
artifact_path_name = json.loads(mlflow.get_run(f'{best_run_id}').data.tags['mlflow.log-model.history'])[0]['artifact_path']
# print(hyper_opt_parent_run_id)
print(best_run_id)
print(artifact_path_name)
mlflow.sklearn.load_model(f"{best_artifact_path}/{artifact_path_name}")

In [None]:
filter_string=f"tags.mlflow.runName LIKE 'HyperOpt for Lasso'"
hyper_opt_parent_run_id = mlflow.search_runs(experiment_ids=[14],
                   filter_string=filter_string,
                   order_by = ['metrics.RMSE'])['run_id'][0]

In [None]:
hyper_opt_parent_run_id

In [55]:
client.list_artifacts('7ec03febeb444500a73c2b8511607c82')[0].path

'optuna_XGB_Regressor'

In [48]:
mlflow.get_run(f'7ec03febeb444500a73c2b8511607c82').data.tags

{'mlflow.log-model.history': '[{"run_id": "7ec03febeb444500a73c2b8511607c82", "artifact_path": "optuna_XGB_Regressor", "utc_time_created": "2024-01-23 14:13:49.515310", "flavors": {"python_function": {"loader_module": "mlflow.xgboost", "python_version": "3.10.13", "data": "model.xgb", "env": {"conda": "conda.yaml", "virtualenv": "python_env.yaml"}}, "xgboost": {"xgb_version": "2.0.3", "data": "model.xgb", "model_class": "xgboost.sklearn.XGBRegressor", "model_format": "xgb", "code": null}}, "model_uuid": "883ebde1f79c4477bb0425075fe1a2fd", "mlflow_version": "2.9.2", "model_size_bytes": 699936, "signature": {"inputs": "[{\\"type\\": \\"double\\", \\"name\\": \\"num_pipeline__carat\\"}, {\\"type\\": \\"double\\", \\"name\\": \\"num_pipeline__depth\\"}, {\\"type\\": \\"double\\", \\"name\\": \\"num_pipeline__table\\"}, {\\"type\\": \\"double\\", \\"name\\": \\"num_pipeline__x\\"}, {\\"type\\": \\"double\\", \\"name\\": \\"num_pipeline__y\\"}, {\\"type\\": \\"double\\", \\"name\\": \\"num_p

In [49]:
mlflow.get_artifact_uri()

RestException: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}

In [51]:
mlflow.get_artifact_uri()

RestException: INVALID_PARAMETER_VALUE: Response: {'error_code': 'INVALID_PARAMETER_VALUE'}