In [15]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd

# Set MLflow tracking URI
MLFLOW_TRACKING_URI = '../models/mlruns'
MLFLOW_EXPERIMENT_NAME = "churn_prediction"
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
client = MlflowClient()


In [16]:
experiment = client.get_experiment_by_name(MLFLOW_EXPERIMENT_NAME)
experiment



<Experiment: artifact_location='file:C:/Users/Abdelhakiem/Documents/CodingLandscape/side_projects/Integrated_Telco_Customer_Churn_Framework/models/mlruns/958570086509240819', creation_time=1749675738468, experiment_id='958570086509240819', last_update_time=1749675738468, lifecycle_stage='active', name='churn_prediction', tags={}>

In [None]:
runs = mlflow.search_runs([experiment.experiment_id])  
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.f1_score,metrics.precision,metrics.accuracy,metrics.recall,...,params.probability,params.random_state,params.n_jobs,params.max_iter,tags.mlflow.source.git.commit,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.runName,tags.mlflow.log-model.history
0,f81e4e41bdae4724a5f0c30a5abcaf7a,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:22:48.188000+00:00,2025-06-11 21:22:54.404000+00:00,0.557018,0.401899,0.617062,0.907143,...,,,,,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,naive_bayes_20250612_002248,"[{""run_id"": ""f81e4e41bdae4724a5f0c30a5abcaf7a""..."
1,ebdacefe47c94268b9a80fb6c3e7fe7f,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:22:38.865000+00:00,2025-06-11 21:22:44.418000+00:00,0.586751,0.525424,0.751659,0.664286,...,True,42.0,,,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,svm_20250612_002238,"[{""run_id"": ""ebdacefe47c94268b9a80fb6c3e7fe7f""..."
2,89e21c88edba446dbb304326fcb36140,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:16:43.957000+00:00,2025-06-11 21:16:50.419000+00:00,0.57483,0.548701,0.763033,0.603571,...,,42.0,-1.0,,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,random_forest_20250612_001643,"[{""run_id"": ""89e21c88edba446dbb304326fcb36140""..."
3,bcf09256f8784a2cbad5432ae957afd4,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:13:16.816000+00:00,2025-06-11 21:13:21.774000+00:00,0.59845,0.528767,0.754502,0.689286,...,,42.0,,,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,decision_tree_20250612_001316,"[{""run_id"": ""bcf09256f8784a2cbad5432ae957afd4""..."
4,57aaa78d79f04717a834b7d8a67b5fe9,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:13:01.667000+00:00,2025-06-11 21:13:10.464000+00:00,0.605304,0.537396,0.76019,0.692857,...,,42.0,,1000.0,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,logistic_regression_20250612_001300,"[{""run_id"": ""57aaa78d79f04717a834b7d8a67b5fe9""..."
5,c098d2fc26ea4a1c8b6e71b74eaa5279,958570086509240819,FINISHED,file:C:/Users/Abdelhakiem/Documents/CodingLand...,2025-06-11 21:02:34.139000+00:00,2025-06-11 21:02:41.666000+00:00,0.575342,0.501326,0.735545,0.675,...,,42.0,,,212606e22d23b534365a84696fb33203af9154d0,LOCAL,Abdelhakiem,main.py,decision_tree_20250612_000233,"[{""run_id"": ""c098d2fc26ea4a1c8b6e71b74eaa5279""..."


In [None]:

def find_best_model_by_recall(experiment_name):
    """Find the best model run based on highest recall"""
    # Get experiment by name
    experiment = client.get_experiment_by_name(experiment_name)
    if not experiment:
        raise ValueError(f"Experiment '{experiment_name}' not found")
    
    # Get all runs for the experiment
    runs = client.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["metrics.recall DESC"]
    )
    
    if not runs:
        print("No runs found in experiment")
        return None
    
    # Create a results dataframe
    results = []
    for run in runs:
        metrics = run.data.metrics
        params = run.data.params
        
        results.append({
            "run_id": run.info.run_id,
            "recall": metrics.get("recall", -1),
            "f1_score": metrics.get("f1_score", -1),
            "accuracy": metrics.get("accuracy", -1),
            "roc_auc": metrics.get("roc_auc", -1),
            "run_date": run.info.start_time
        })
    
    results_df = pd.DataFrame(results)
    
    # Find the best run by recall
    best_run = max(runs, key=lambda r: r.data.metrics.get("recall", -1))
    
    print("="*80)
    print(f"Best run by recall:")
    print(f"Run ID: {best_run.info.run_id}")
    print(f"Model type: {best_run.data.params.get('model_type', 'unknown')}")
    print(f"Recall: {best_run.data.metrics.get('recall', 'N/A'):.4f}")
    print(f"F1 Score: {best_run.data.metrics.get('f1_score', 'N/A'):.4f}")
    print(f"Accuracy: {best_run.data.metrics.get('accuracy', 'N/A'):.4f}")
    print(f"Preprocessing: {best_run.data.params.get('preprocessing', 'unknown')}")
    print("="*80)
    
    print("\nAll runs sorted by recall:")
    print(results_df.sort_values("recall", ascending=False).to_string(index=False))
    
    return best_run

def load_best_model(best_run):
    """Load the best model and run a test prediction"""
    print("\nLoading best model for testing...")
    model = mlflow.sklearn.load_model(f"runs:/{best_run.info.run_id}/model")
    
    # Load sample data (you'll need to replace this with your actual test data)
    return model



In [30]:
best_run.info.run_id

'f81e4e41bdae4724a5f0c30a5abcaf7a'

In [26]:
best_run = find_best_model_by_recall(EXPERIMENT_NAME)

if best_run:
    best_model = load_best_model(best_run)
    
    # Save the best model to file if needed
    import joblib
    joblib.dump(best_model, "best_recall_model.pkl")
    print("Best model saved to best_recall_model.pkl")

Best run by recall:
Run ID: f81e4e41bdae4724a5f0c30a5abcaf7a
Model type: unknown
Recall: 0.9071
F1 Score: 0.5570
Accuracy: 0.6171
Preprocessing: unknown

All runs sorted by recall:
                          run_id   recall  f1_score  accuracy  roc_auc      run_date
f81e4e41bdae4724a5f0c30a5abcaf7a 0.907143  0.557018  0.617062 0.816606 1749676968188
57aaa78d79f04717a834b7d8a67b5fe9 0.692857  0.605304  0.760190 0.820714 1749676381667
bcf09256f8784a2cbad5432ae957afd4 0.689286  0.598450  0.754502 0.779166 1749676396816
c098d2fc26ea4a1c8b6e71b74eaa5279 0.675000  0.575342  0.735545 0.769396 1749675754139
ebdacefe47c94268b9a80fb6c3e7fe7f 0.664286  0.586751  0.751659 0.783196 1749676958865
89e21c88edba446dbb304326fcb36140 0.603571  0.574830  0.763033 0.806756 1749676603957

Loading best model for testing...
Best model saved to best_recall_model.pkl


In [27]:
def get_model_by_run_name(run_name, experiment_name=None, model_path="model"):

    # Build search filter
    filter_string = f"tags.mlflow.runName = '{run_name}'"
    
    # Determine experiment IDs to search
    experiment_ids = None
    if experiment_name:
        experiment = client.get_experiment_by_name(experiment_name)
        if not experiment:
            raise ValueError(f"Experiment '{experiment_name}' not found")
        experiment_ids = [experiment.experiment_id]
    
    # Search for runs
    runs = client.search_runs(
        experiment_ids=experiment_ids,
        filter_string=filter_string,
        order_by=["start_time DESC"]
    )
    
    # Handle search results
    if not runs:
        raise ValueError(f"No runs found with name: {run_name}")
    
    if not experiment_name and len(runs) > 1:
        experiments = {run.info.experiment_id for run in runs}
        raise ValueError(
            f"Multiple runs found ({len(runs)}) across {len(experiments)} experiments. "
            "Please specify experiment_name."
        )
    
    # Select the most recent run
    selected_run = runs[0]
    
    try:
        # Load model
        model_uri = f"runs:/{selected_run.info.run_id}/{model_path}"
        return mlflow.pyfunc.load_model(model_uri)
    except MlflowException as e:
        # Fallback to sklearn flavor
        try:
            return mlflow.sklearn.load_model(model_uri)
        except MlflowException:
            raise ValueError(f"Error loading model: {str(e)}") from e

model = get_model_by_run_name(
    run_name="logistic_regression_20250612_001300",
    experiment_name=MLFLOW_EXPERIMENT_NAME
)
print("Model loaded successfully!")


Model loaded successfully!


In [32]:
model_lr = mlflow.sklearn.load_model(f"runs:/57aaa78d79f04717a834b7d8a67b5fe9/model")
model_lr


In [33]:
joblib.dump(model_lr, "logistic_regression_model.pkl")

['logistic_regression_model.pkl']