### Initialize MLflow tracking, emulating how Sientia website environment works

In [None]:
import sientia_tracker.regression as regression
import mlflow
import os


# Initialize MLflow tracking
tracking_uri = "file:./tmp/mlruns"
username = "example_user"
password = "example_password"
project_name = "example_project_regression"

tracker = regression.RegressionTracker(tracking_uri,username,password)
tracker.set_project(project_name)



### Set parameters needed to save the model. They are: the experiment name, inputs, training size, a flag to indicate if the data was shuffled 

In [17]:
dataset_name= "California Housing"
inputs= "MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude"
train_size = 0.8
shuffle = False


### Load the dataset and create a model using default values of the run parameters

In [18]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


# Load dataset
data = fetch_california_housing()
X = data.data
y = data.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = train_size, random_state=42, shuffle=shuffle)

# Initialize and train model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate metrics
r2 = r2_score(y_test, y_pred)


### Initialize run

In [19]:
# Start a run
mlflow.end_run()

run = tracker.save_experiment(model, dataset_name=dataset_name, inputs=inputs, train_size=train_size, r2=r2,shuffle=shuffle)
run_id = run.info.run_id


Saving experiment example_project_regression


### Log models and metrics

In [20]:
# Log parameters and metrics
tracker.log_params({"max_iter": 1000})
# Log model
artifact_path = "LogisticRegression_for_Iris"
tracker.log_model(model, artifact_path)
# End the run
mlflow.end_run()

### Retrieve information of run

In [21]:
# Retrieve the run using the run ID
retrieved_run = tracker.client.get_run(run_id)

# Access and print metrics and params 
metrics = retrieved_run.data.metrics
params = retrieved_run.data.params
print("Metrics:", metrics)
for key, value in params.items():
    print( key,':' ,value)


model = tracker.client

Metrics: {'r2': 0.44760460965011317}
Dataset : California Housing
Date Column : date
Inputs : MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude
max_iter : 1000
Model : Linear Regression
Shuffle : False
Target : target
Train Size : 0.8


# Retrieve the model


In [22]:
# Ensure the destiny path exists
dst_path="./local_model"
os.makedirs(dst_path, exist_ok=True)

# Download the model
local_path =  tracker.client.download_artifacts(run_id, artifact_path, dst_path="./local_model")
model = mlflow.pyfunc.load_model(local_path)

# Make predictions
print(model.predict(X_test))

[0.746 1.168 1.227 ... 0.857 0.857 0.735]
