#Import necessary libraries

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import GridSearchCV

import mlflow
import mlflow.sklearn
import mlflow.xgboost 
import xgboost as xgb

In [3]:
# Load dataset 
df = pd.read_csv("C:/Users/Admin/Guvi Mini Project/Amazon delivery price predection/cleaned_data")  

#Train the dataset

In [4]:
# Define features and target
X = df.drop(columns=["Delivery_Time"])  
y = df["Delivery_Time"]

In [5]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "XGBoost": xgb.XGBRegressor(),
}


In [7]:
# Define hyperparameter grids
param_grids = {
    "Linear Regression": {},  # No tuning required
    "Random Forest": {
        "n_estimators": [50, 100, 200],
        "max_depth": [None, 10, 20],
        "min_samples_split": [2, 5, 10]
    },
    "Gradient Boosting": {
        "n_estimators": [50, 100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 5, 10]
    },
    "XGBoost": {
        "n_estimators": [50, 100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 5, 10]
    }
}


In [9]:
best_params = {}

for name, model in models.items():
    if param_grids[name]:  # If tuning parameters exist
        grid_search = GridSearchCV(model, param_grids[name], scoring="neg_mean_squared_error", cv=3, n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_params[name] = grid_search.best_params_
    else:
        best_params[name] = "No tuning required"

# Print the best hyperparameters
for model_name, params in best_params.items():
    print(f"Best parameters for {model_name}: {params}")


Best parameters for Linear Regression: No tuning required
Best parameters for Random Forest: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 100}
Best parameters for Gradient Boosting: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
Best parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}


#ML flow

In [8]:
# Initialize MLflow experiment
mlflow.set_experiment(" Amazon_Delivery_Time_Prediction")
mlflow.set_tracking_uri('http://127.0.0.1:5000')
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Apply best parameters
        if name in best_params and isinstance(best_params[name], dict):
            model.set_params(**best_params[name])  # Update model with best params
        
        # Train model
        model.fit(X_train, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test)
        
        # Calculate metrics
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        # Log metrics in MLflow
        mlflow.log_param("model_type", name)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R-squared", r2)
        
        # Log models using MLflow framework
        if name == "XGBoost":
            mlflow.xgboost.log_model(model, name)
        else:
            mlflow.sklearn.log_model(model, name)
        
        print(f"{name}: RMSE={rmse:.3f}, MAE={mae:.3f}, R2={r2:.3f}")

print("model training and MLflow logging complete.")


2025/05/27 10:15:07 INFO mlflow.tracking.fluent: Experiment with name ' Amazon_Delivery_Time_Prediction' does not exist. Creating a new experiment.


The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



Linear Regression: RMSE=42.736, MAE=33.026, R2=0.282
🏃 View run Linear Regression at: http://127.0.0.1:5000/#/experiments/667631394996600832/runs/544d3d7e57d94db1b5ba51f549a97eba
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/667631394996600832




Random Forest: RMSE=23.147, MAE=18.068, R2=0.789
🏃 View run Random Forest at: http://127.0.0.1:5000/#/experiments/667631394996600832/runs/2eb3ec7a36074ab99eec3cc9acbd9f01
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/667631394996600832




Gradient Boosting: RMSE=23.894, MAE=19.028, R2=0.776
🏃 View run Gradient Boosting at: http://127.0.0.1:5000/#/experiments/667631394996600832/runs/9bdf943cd9b24987802f16950c4fb4ea
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/667631394996600832


  self.get_booster().save_model(fname)


XGBoost: RMSE=22.725, MAE=17.890, R2=0.797
🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/667631394996600832/runs/7f1ab886eede446388dd17bea7c6a7b4
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/667631394996600832
model training and MLflow logging complete.


Model Registration

In [None]:
'''name="XGB_Amazon_Delivery"
run_id=input("7f1ab886eede446388dd17bea7c6a7b4")
model_uri=f'runs:/{run_id}/model_name'

with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri=model_uri, model_name=name)'''

In [8]:
import mlflow.pyfunc
name="XGB_Amazon_Delivery"
model_version=1
model=mlflow.pyfunc.load_model(model_uri=f'models:/{name}/{model_version}')


In [9]:
y_pred=model.predict(X)
y_pred

array([121.697586, 168.33238 , 119.99006 , ..., 167.6479  , 128.51352 ,
       175.3937  ], dtype=float32)

Transition the Model

In [10]:
current_model_uri=f'models:/{name}@challenger'
production_model_name="XGB_Amazon_Delivery"
client=mlflow.MlflowClient()
client.copy_model_version(src_model_uri=current_model_uri,dst_name=production_model_name)

Registered model 'XGB_Amazon_Delivery' already exists. Creating a new version of this model...
Copied version '1' of model 'XGB_Amazon_Delivery' to version '2' of model 'XGB_Amazon_Delivery'.


<ModelVersion: aliases=[], creation_timestamp=1748330470215, current_stage='None', description='', last_updated_timestamp=1748330470215, name='XGB_Amazon_Delivery', run_id='7f1ab886eede446388dd17bea7c6a7b4', run_link='', source='models:/XGB_Amazon_Delivery/1', status='READY', status_message=None, tags={}, user_id=None, version=2>

In [12]:
model_version=1
production_model_uri=f"models:/{production_model_name}@challenger"
loaded_model=mlflow.pyfunc.load_model(production_model_uri)
y_pred=loaded_model.predict(X_test)
y_pred

array([121.00742,  75.36616,  92.17846, ...,  63.94145, 195.97154,
       186.52649], dtype=float32)

Deployment

In [14]:

import dagshub
dagshub.init(repo_owner='Ramya41014', repo_name='my-first-repo', mlflow=True)

Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=6dc5e0da-1204-4a9b-b099-9f21c0f6bd44&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=666b67bd261a89c5870c8c91ecf2ee093768c663dbb32e179898bb4355f21091




INFO:httpx:HTTP Request: POST https://dagshub.com/login/oauth/middleman "HTTP/1.1 200 OK"


INFO:httpx:HTTP Request: POST https://dagshub.com/login/oauth/access_token "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


INFO:dagshub:Accessing as Ramya41014
INFO:httpx:HTTP Request: GET https://dagshub.com/api/v1/repos/Ramya41014/my-first-repo "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://dagshub.com/api/v1/user "HTTP/1.1 200 OK"


INFO:dagshub:Initialized MLflow to track repo "Ramya41014/my-first-repo"


INFO:dagshub:Repository Ramya41014/my-first-repo initialized!
