In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, mean_absolute_error
import joblib
import mlflow
import mlflow.sklearn

mlflow.sklearn.autolog()

df = pd.read_csv("StudentsPerformance.csv")

le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col])

X = df.drop("math score", axis=1)
y = df["math score"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

with mlflow.start_run():
    model = DecisionTreeRegressor(max_depth=5, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    mlflow.log_metric("r2_score", r2)
    mlflow.log_metric("mae", mae)

    print("R2 Score:", r2)
    print("MAE:", mae)

    # -------------------------
    # SAVE MODEL USING JOBLIB
    # -------------------------
    joblib.dump(model, "decision_tree.pkl")
    print("Model saved as decision_tree.pkl")


  return FileStore(store_uri, store_uri)
1. Set the MLFLOW_TRACKING_URI environment variable to the desired tracking URI. `export MLFLOW_TRACKING_URI=http://localhost:5000`
2. Set the tracking URI programmatically by calling `mlflow.set_tracking_uri`. `mlflow.set_tracking_uri('http://localhost:5000')`


R2 Score: 0.8242299188020519
MAE: 4.931523210662771
Model saved as decision_tree.pkl
