In [None]:
## Motivation
# 1.1 - Often uses spreadhset to record parameter configs & data sources
# 1.2 Inefficient Collaborations & Standardize Practices

## MLflow Components
# 1. Tracking - Experiments (code, data, model, configs and results) & Comparing for model selection
# 2. Projects - Packaging code, make them reproducible on any platform
# 3. Models - Deploying ML Models in diverse env; Model build of various libs
# 4. Models Registry - Manage, organize, version and track ML Models & metadata
# 5. Deploymnet for LLMs - Usage & Management of various LLM Providers
# 6. LLM Evaluate - Evaluating LLMs and the prompt

## MLflow Tracking
# Experiment Models by training them on various hyperparameter, testing new features, more data and using different algos. Hence So Many combinations
# Reproducibility - By Logging & recording all params, code versions and dependencies used
# Experiment is a collection of runs. Where each run represents the model training. Group together related runs. Start a experiment, a dedicated space for it, allowing us to track & compare runs within that experiment easily
# Run is a single execution of ML workflow within a specific experiment. Have all details of that execution, code, data, params, metrics & artifacts
# MLflow log various details of a run.


In [2]:
import sqlite3
import pandas as pd

# connect to sqlite database
conn = sqlite3.connect('my.db')

# create a cursor object to execute SQL queries
cursor = conn.cursor()

cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
table_names = cursor.fetchall()

for table in table_names:
    print(table)

('experiments',)
('alembic_version',)
('experiment_tags',)
('registered_models',)
('runs',)
('registered_model_tags',)
('model_versions',)
('latest_metrics',)
('metrics',)
('registered_model_aliases',)
('inputs',)
('input_tags',)
('params',)
('trace_info',)
('trace_tags',)
('trace_request_metadata',)
('tags',)
('datasets',)
('logged_models',)
('logged_model_metrics',)
('logged_model_params',)
('logged_model_tags',)
('model_version_tags',)


In [None]:
cursor.execute("Select * from metrics")
rows = cursor.fetchall()
column_name = [desc[0] for desc in cursor.description]
pd.DataFrame(rows, columns=column_name)

Unnamed: 0,key,value,timestamp,run_uuid,step,is_nan
0,training_precision_score,0.958627,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
1,training_recall_score,0.9575,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
2,training_f1_score,0.957463,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
3,training_accuracy_score,0.9575,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
4,training_log_loss,0.179907,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
5,training_roc_auc,0.995568,1753113554784,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0
6,training_score,0.9575,1753113555932,78d1fc4855c54a08a4cca2c6d0cd0aea,0,0


In [4]:
cursor.execute("Select * from params")
rows = cursor.fetchall()
column_name = [desc[0] for desc in cursor.description]
pd.DataFrame(rows, columns=column_name)

Unnamed: 0,key,value,run_uuid
0,bootstrap,True,78d1fc4855c54a08a4cca2c6d0cd0aea
1,ccp_alpha,0.0,78d1fc4855c54a08a4cca2c6d0cd0aea
2,class_weight,,78d1fc4855c54a08a4cca2c6d0cd0aea
3,criterion,gini,78d1fc4855c54a08a4cca2c6d0cd0aea
4,max_depth,5,78d1fc4855c54a08a4cca2c6d0cd0aea
5,max_features,sqrt,78d1fc4855c54a08a4cca2c6d0cd0aea
6,max_leaf_nodes,,78d1fc4855c54a08a4cca2c6d0cd0aea
7,max_samples,,78d1fc4855c54a08a4cca2c6d0cd0aea
8,min_impurity_decrease,0.0,78d1fc4855c54a08a4cca2c6d0cd0aea
9,min_samples_leaf,1,78d1fc4855c54a08a4cca2c6d0cd0aea
