In [None]:
#Task 1
# Import necessary Libraries
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, f1_score
from mlflow.models.signature import infer_signature


In [None]:
# Load the Wine Quality dataset
df = pd.read_csv("winequality-red.csv")

# Define features and target
X = df.drop('quality', axis=1)
y = df['quality']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Set the experiment name
mlflow.set_experiment("Wine Quality Experiment-MLFlow22")

In [None]:
# Task 2
# Ensure MLflow is tracking  with proper logs
def train_and_log_model(model, model_name):
    # End any active run
    if mlflow.active_run():
        mlflow.end_run()
    with mlflow.start_run(run_name=model_name):
        # Log model parameters
        mlflow.log_params(model.get_params())

        # Train the model
        model.fit(X_train, y_train)

        # Predict and evaluate
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted', zero_division=1)
        f1 = f1_score(y_test, y_pred, average='weighted')

        # LLog metrics                        
        mlflow.log_metrics({
            "accuracy": accuracy,
            "precision": precision,
            "f1_score": f1
        })

        # Define an example input for model signature
        input_example = X_test.iloc[:1]
        signature = infer_signature(X_test, y_pred)

        # Log the model with signature and input example
        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path="model",
            signature=signature,
            input_example=input_example
        )

        # Consolee log for confirmation                                                                            
        print(f"Logged {model_name} model with accuracy: {accuracy:.4f}, precision: {precision:.4f}, f1_score: {f1:.4f}")
        
# Train and log the Random Forest and Gradient Boosting models
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10)
train_and_log_model(rf_model, "Random Forest")

gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
train_and_log_model(gb_model, "Gradient Boosting")
                                       