# Experiment Tracking using MLFlow

For a local server, in the terminal type "mlflow ui". Leave the terminal running
Will confirm the URL to connect to within a browser window

http://127.0.0.1:5000

In [3]:
import mlflow
import mlflow.sklearn

## Data prep

In [4]:
import numpy as np
import pandas as pd
raw_data = pd.read_csv('data\\bank_transactions_data_2.csv')
numeric_cols = ['TransactionAmount', 'CustomerAge', 'TransactionDuration', 'LoginAttempts', 'AccountBalance']
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numeric_scaled = pd.DataFrame(scaler.fit_transform(raw_data[numeric_cols]), columns=numeric_cols)

Set up the experiment and the tracking URL

In [5]:
mlflow.set_experiment("TransactionFraudClustering")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

2024/12/03 17:27:45 INFO mlflow.tracking.fluent: Experiment with name 'TransactionFraudClustering' does not exist. Creating a new experiment.


In [6]:
from sklearn import metrics
from sklearn.metrics import pairwise_distances

In [19]:
n_clusters = 8

Start a run and log the appropriate hyper parameters and metrics for comparison

In [20]:
with mlflow.start_run():
    # Run and score the model
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans_labels = kmeans.fit_predict(numeric_scaled)
    raw_data['KMeans_Cluster'] = kmeans_labels
    sil_score = metrics.silhouette_score(numeric_scaled, kmeans_labels, metric='euclidean')
    calhar_score = metrics.calinski_harabasz_score(numeric_scaled, kmeans_labels)
    # Now the MLFlow bit
    mlflow.log_param("Num_Clusters", n_clusters)
    mlflow.log_metric("Silhouette_score", sil_score)
    mlflow.log_metric("Calinski_Harabasz_score", calhar_score)

🏃 View run unleashed-shrimp-307 at: http://127.0.0.1:5000/#/experiments/170621176408125343/runs/fbe10e418d2c41f18bd677e0355a5be2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/170621176408125343


