### Imports

In [11]:
import os 
import mlflow
import mlflow.sklearn
from dotenv import load_dotenv

from datetime import datetime
from mlflow.tracking import MlflowClient
import random
from random import randint
from sklearn.ensemble import RandomForestRegressor

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score

### MLFlow init

In [12]:
mlflow_tracking_uri = "http://20.19.180.163:5000"
mlflow_exp = "robin_experiment"
#file_path = "artifacts/"

load_dotenv()
MLFLOW_S3_ENDPOINT_URL = os.getenv('MLFLOW_S3_ENDPOINT_URL')
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')

print(AWS_ACCESS_KEY_ID)
print(AWS_SECRET_ACCESS_KEY)

4ystUvWaFRjD7vtl
f99NoVvnJ6xJYg0e4Id7td0Nvdobt3pA


### Data Preprocessing 

In [3]:
train_data = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')

train_data = train_data.drop(columns=['PassengerId', 'HomePlanet', 'CryoSleep','Cabin', 'Destination', 'VIP', 'Name', 'Spa', 'VRDeck'])
train_data = train_data.dropna()

test_data = test_data.drop(columns=['PassengerId', 'HomePlanet', 'CryoSleep','Cabin', 'Destination', 'VIP', 'Name','Spa', 'VRDeck'])
test_data = test_data.dropna()

X = train_data.drop(['Transported'], axis=1)
y = train_data['Transported']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Mlflow Run

#### Config Run :

In [4]:
mlflow.set_tracking_uri(mlflow_tracking_uri)
mlflow.set_experiment(mlflow_exp)

model_name = "robi-clf-run5"

run_name = "{year}_{month}_{day}_{hour}_{minute}_{model_name}_run".format(
        year=datetime.today().year,
        month=datetime.today().month,
        day=datetime.today().day,
        hour=datetime.today().hour,
        minute=datetime.today().minute,
        model_name=model_name
    )

#### Start Run

In [5]:
with mlflow.start_run(run_name=run_name) as run:  
    #generate random parameters and log them  
    max_depth = randint(2, 10)
    mlflow.log_param("max_depth", max_depth)

    min_samples_split = randint(4, 20)
    mlflow.log_param("min_samples_split", min_samples_split)
    
    criterion = random.choice(["entropy", "log_loss", "gini"])
    mlflow.log_param("criterion", criterion)

    params = {"max_depth": max_depth, "min_samples_split": min_samples_split, "criterion":criterion}
    # initiate model with previous chosen params
    clf = DecisionTreeClassifier(**params)
    clf.fit(X_train.values, y_train)
    y_pred = clf.predict(X_test.values)

    # log accuracy and recall_score as metrics
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='weighted')
    mlflow.log_metrics({"accuracy": accuracy, "recall_score": recall})
    
    mlflow.sklearn.log_model(sk_model=clf, artifact_path="model", registered_model_name="clf-titanic-robin")

    mlflow.end_run()

Successfully registered model 'clf-titanic-robin'.
2023/03/24 16:48:15 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: clf-titanic-robin, version 1
Created version '1' of model 'clf-titanic-robin'.


In [6]:
experiment_name = "robin_experiment"
metric_name = "accuracy"
search_runs = mlflow.search_runs(experiment_ids=[mlflow.get_experiment_by_name(experiment_name).experiment_id])
search_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.accuracy,metrics.recall_score,params.min_samples_split,params.criterion,params.max_depth,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.source.type,tags.mlflow.log-model.history
0,501d3c7bfb4e412a83dd57b3a3e53ee8,7,FINISHED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:48:13.145000+00:00,2023-03-24 15:48:15.035000+00:00,0.734296,0.734296,13,gini,4,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_48_robi-clf-run5_run,LOCAL,"[{""run_id"": ""501d3c7bfb4e412a83dd57b3a3e53ee8""..."
1,4a9125757fc94d5ca8784ed16a3e9ff5,7,FAILED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:47:57.776000+00:00,2023-03-24 15:47:59.403000+00:00,0.71294,0.71294,16,log_loss,10,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_47_robi-clf-run5_run,LOCAL,
2,14af9bbb9f23495583426e1507900de2,7,FAILED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:47:23.365000+00:00,2023-03-24 15:47:24.895000+00:00,0.715452,0.715452,12,gini,3,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_47_robi-clf-run5_run,LOCAL,
3,d4d2150058f843b1bd479d1b9027a299,7,FAILED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:47:09.466000+00:00,2023-03-24 15:47:11.137000+00:00,0.690955,0.690955,14,gini,2,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_39_robi-clf-run5_run,LOCAL,
4,5ff0abb84f8d4949b6c810acf32d6613,7,FAILED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:45:32.219000+00:00,2023-03-24 15:45:33.858000+00:00,0.717965,0.717965,4,entropy,9,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_39_robi-clf-run5_run,LOCAL,
5,81057ce96ee14f13bfdffc5e35e96e4d,7,FAILED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:44:00.531000+00:00,2023-03-24 15:44:02.242000+00:00,0.715452,0.715452,13,gini,3,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_39_robi-clf-run5_run,LOCAL,
6,fa1c7f1a68b14b1f90a371d1b0c8cbb3,7,FINISHED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:28:49.086000+00:00,2023-03-24 15:28:49.330000+00:00,0.720477,0.720477,8,log_loss,5,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_28_robi-clf-run4_run,LOCAL,
7,ec35b2bc3c694152aa44779e79a2fe57,7,FINISHED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:08:25.210000+00:00,2023-03-24 15:08:25.443000+00:00,0.723618,0.723618,20,gini,7,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_8_robi-clf-run3_run,LOCAL,
8,eb10c3b4706a43f2898885c950cf1831,7,FINISHED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:07:39.066000+00:00,2023-03-24 15:07:39.298000+00:00,0.714196,0.714196,16,entropy,3,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_7_robi-clf-run2_run,LOCAL,
9,3505b7783411425cbfc5ae7b192b5390,7,FINISHED,http://20.19.180.163:9000/robin-mlflow-artifac...,2023-03-24 15:02:28.643000+00:00,2023-03-24 15:02:28.870000+00:00,0.718593,0.718593,15,gini,5,robinfournier,/Users/robinfournier/Desktop/YNOV/IACloud/mlfl...,2023_3_24_16_2_robi-clf-run1_run,LOCAL,
