In [17]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import mlflow
import mlflow.xgboost
import dagshub
import joblib

In [18]:
# Initialize DagsHub to log MLflow runs
dagshub.init(repo_owner='AntoineD01', repo_name='House-prediction', mlflow=True)

# Set experiment (will create if doesn't exist)
experiment_name = "HousePricePrediction"
mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='mlflow-artifacts:/d44301c7c7484c8e99fc60d12f9f3860', creation_time=1751112538551, experiment_id='0', last_update_time=1751112538551, lifecycle_stage='active', name='HousePricePrediction', tags={}>

In [19]:
# Load processed data
data_path = '../../data/processed/processed_train.csv'
df = pd.read_csv(data_path)
print(f"Data shape: {df.shape}")
df.head()

Data shape: (1460, 288)


Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,60,65.0,8450,7,5,2003,2003,196.0,706,0,...,False,False,False,True,False,False,False,False,True,False
1,20,80.0,9600,6,8,1976,1976,0.0,978,0,...,False,False,False,True,False,False,False,False,True,False
2,60,68.0,11250,7,5,2001,2002,162.0,486,0,...,False,False,False,True,False,False,False,False,True,False
3,70,60.0,9550,7,5,1915,1970,0.0,216,0,...,False,False,False,True,True,False,False,False,False,False
4,60,84.0,14260,8,5,2000,2000,350.0,655,0,...,False,False,False,True,False,False,False,False,True,False


In [20]:
X = df.drop('SalePrice', axis=1)
y = df['SalePrice']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print(f"Train shape: {X_train.shape}")
print(f"Test shape: {X_test.shape}")


Train shape: (1168, 287)
Test shape: (292, 287)


In [21]:
def train_and_log_model(n_estimators, max_depth, learning_rate):
    with mlflow.start_run():
        model = xgb.XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            random_state=42
        )
        model.fit(X_train, y_train)

        # Predictions
        preds = model.predict(X_test)
        r2 = r2_score(y_test, preds)
        rmse = mean_squared_error(y_test, preds, squared=False)

        # Log parameters and metrics
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("rmse", rmse)

       # Save model to file
        model_file = f"../../ml/models/xgb_model_{n_estimators}_{max_depth}_{learning_rate}.pkl"
        joblib.dump(model, model_file)
        print(f"Saved model locally to {model_file}")

        # Log the path to MLflow as a param or tag
        mlflow.log_param("model_path", model_file)

        print(f"Logged Model - R2: {r2:.4f}, RMSE: {rmse:.2f}")
        return model

In [22]:
print("Training and Logging Model 1...")
model_1 = train_and_log_model(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1
)

Training and Logging Model 1...




Saved model locally to ../../ml/models/xgb_model_100_5_0.1.pkl
Logged Model - R2: 0.9196, RMSE: 24836.53
🏃 View run upbeat-stag-404 at: https://dagshub.com/AntoineD01/House-prediction.mlflow/#/experiments/0/runs/06f4dafc670f466d86dd332eae3ebcf1
🧪 View experiment at: https://dagshub.com/AntoineD01/House-prediction.mlflow/#/experiments/0


In [23]:
print("Training and Logging Model 2...")
model_2 = train_and_log_model(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05
)

Training and Logging Model 2...




Saved model locally to ../../ml/models/xgb_model_200_8_0.05.pkl
Logged Model - R2: 0.9059, RMSE: 26867.15
🏃 View run serious-stork-184 at: https://dagshub.com/AntoineD01/House-prediction.mlflow/#/experiments/0/runs/b8434f9e4e6c48849ffc8e3d345e9bb1
🧪 View experiment at: https://dagshub.com/AntoineD01/House-prediction.mlflow/#/experiments/0
