In [1]:
!pip install mlflow scikit-learn pandas numpy matplotlib



In [13]:
!mkdir -p /home/jupyter/mlflow_store

In [2]:
ls /home/jupyter/mlflow_store

[0m[01;34m486189184932835047[0m/


In [3]:
import numpy as np
import pandas as pd
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [4]:
def poison_data(X, y, noise_ratio):
    """
    Adds feature + label noise.
    noise_ratio is float between 0 and 1.
    """
    X = X.copy()
    y = y.copy()

    num_samples = X.shape[0]
    num_poison = int(num_samples * noise_ratio)

    # Random sample indices
    indices = np.random.choice(num_samples, num_poison, replace=False)

    # Inject noise into features
    X_noise = np.random.uniform(X.min(), X.max(), (num_poison, X.shape[1]))
    X[indices] = X_noise

    # Inject random label noise
    unique_labels = np.unique(y)
    y[indices] = np.random.choice(unique_labels, num_poison)

    return X, y

In [5]:
mlflow.set_tracking_uri("file:///home/jupyter/mlflow_store")
mlflow.set_experiment("iris_poisoning")

  return FileStore(store_uri, store_uri)


<Experiment: artifact_location='file:///home/jupyter/mlflow_store/486189184932835047', creation_time=1763232044335, experiment_id='486189184932835047', last_update_time=1763232044335, lifecycle_stage='active', name='iris_poisoning', tags={}>

In [6]:
def train_with_noise(noise_level):

    # Load clean data
    data = load_iris()
    X = data.data
    y = data.target

    # Apply poisoning
    X_poison, y_poison = poison_data(X, y, noise_level)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_poison, y_poison, test_size=0.2, random_state=42
    )

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # MLflow logging
    with mlflow.start_run():
        mlflow.log_param("noise_level", noise_level)

        # Signature + input example
        X_sample = X_train[:1]
        y_sample_pred = model.predict(X_sample)
        signature = infer_signature(X_sample, y_sample_pred)

        safe_noise = str(noise_level).replace(".", "_")

        mlflow.sklearn.log_model(
            sk_model=model,
            name=f"rf_model_noise_{safe_noise}",   # ‚Üê FIXED
            signature=signature,
            input_example=X_sample.tolist())

        accuracy = model.score(X_test, y_test)
        mlflow.log_metric("accuracy", accuracy)

    return accuracy

In [8]:
noise_levels = [0.05, 0.10, 0.50]
results = {}

for nl in noise_levels:
    acc = train_with_noise(nl)
    results[nl] = acc

results


{0.05: 0.9333333333333333, 0.1: 0.9666666666666667, 0.5: 0.6}