In [20]:
import pandas as pd

pricing_df = pd.read_csv("https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_pricing_project.csv", index_col=0)
pricing_df.head()

Unnamed: 0,model_key,mileage,engine_power,fuel,paint_color,car_type,private_parking_available,has_gps,has_air_conditioning,automatic_car,has_getaround_connect,has_speed_regulator,winter_tires,rental_price_per_day
0,Citroën,140411,100,diesel,black,convertible,True,True,False,False,True,True,True,106
1,Citroën,13929,317,petrol,grey,convertible,True,True,False,False,False,True,True,264
2,Citroën,183297,120,diesel,white,convertible,False,False,False,False,True,False,True,101
3,Citroën,128035,135,diesel,red,convertible,True,True,False,False,True,True,True,158
4,Citroën,97097,160,diesel,silver,convertible,True,True,False,False,False,True,True,183


In [11]:
params_list = [i/2000 for i in range(1, 100)]
params_list[:10]

[0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005]

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import r2_score, make_scorer
from dotenv import load_dotenv
import pandas as pd
import mlflow
import joblib
import os

load_dotenv()

categorical_cols = ["model_key", "fuel", "paint_color", "car_type", "private_parking_available", "has_gps", "has_air_conditioning",
                    "automatic_car", "has_getaround_connect", "has_speed_regulator", "winter_tires"]
numerical_cols = ["mileage", "engine_power"]

X = pricing_df[categorical_cols + numerical_cols]
y = pricing_df["rental_price_per_day"]

bins = pd.qcut(y, q=4, duplicates="drop")
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    train_size=0.75,
    random_state=444719,
    stratify=bins
)

onehot_encoder = OneHotEncoder(drop="first", handle_unknown="ignore")
standard_scaler = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ("categorical", onehot_encoder, categorical_cols),
        ("numerical", standard_scaler, numerical_cols)
    ]
)

model = Lasso()  # or Ridge()

pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", model)
])

param_grid = {
    "regressor__alpha": params_list
}

scorer = make_scorer(r2_score)

grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    scoring=scorer,
    n_jobs=-1
)

EXPERIMENT_NAME="getaround-price-prediction"
mlflow.set_tracking_uri(os.environ["TRACKING_SERVER_URL"])
mlflow.set_experiment(EXPERIMENT_NAME)
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)

with mlflow.start_run(experiment_id = experiment.experiment_id, run_name="Lasso_GridSearchCV_1_casting_a_wide_net"):
    grid_search.fit(X_train, y_train)

    mlflow.log_params(grid_search.best_params_)

    y_pred = grid_search.predict(X_test)
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("test_r2", r2)

    print("Best parameters:", grid_search.best_params_)
    print("Best cross-validation R²:", grid_search.best_score_)
    print("Test R²:", r2)

    input_example = X_train.iloc[:1]
    mlflow.sklearn.log_model(
        grid_search.best_estimator_,
        "model",
        input_example=input_example,
        registered_model_name="getaround-price-prediction-model"
    )

    # joblib.dump(grid_search.best_estimator_, "linear_regression_model.pkl")

  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(
  model = cd_fast.sparse_enet_coordinate_descent(


Best parameters: {'regressor__alpha': 0.0085}
Best cross-validation R²: 0.6961471561171957
Test R²: 0.7020558138674361


Registered model 'getaround-price-prediction-model' already exists. Creating a new version of this model...
2025/08/18 19:06:15 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: getaround-price-prediction-model, version 17
Created version '17' of model 'getaround-price-prediction-model'.


🏃 View run Lasso_GridSearchCV_1_casting_a_wide_net at: https://aengusbl-getaround-mlflow.hf.space/#/experiments/2/runs/0d5aa87039d84e7a905702b75b770c27
🧪 View experiment at: https://aengusbl-getaround-mlflow.hf.space/#/experiments/2


Exception ignored in: <function ResourceTracker.__del__ at 0x103251300>
Traceback (most recent call last):
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x109979300>
Traceback (most recent call last):
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/opt/anaconda3/envs/mlflow_env/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exceptio

In [14]:
# This cell tests the model, with an input example that was part of the logs sent to the tracking server
import mlflow
from mlflow.models import Model
from dotenv import load_dotenv

load_dotenv()

model_uri = 's3://getaround-mlflow-artifactstore/2/b21d60925615456587e03b92d95843c4/artifacts/model'

pyfunc_model = mlflow.pyfunc.load_model(model_uri)
input_data = pyfunc_model.input_example

mlflow.models.predict(
    model_uri=model_uri,
    input_data=input_data,
    env_manager="conda",
)

2025/08/18 19:16:53 INFO mlflow.models.python_api: It is highly recommended to use `uv` as the environment manager for predicting with MLflow models as its performance is significantly better than other environment managers. Run `pip install uv` to install uv. See https://docs.astral.sh/uv/getting-started/installation for other installation methods.
2025/08/18 19:16:53 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2025/08/18 19:16:56 INFO mlflow.utils.conda: === Creating conda environment /var/folders/p_/237vh0m92ws4lbbnrhz3b1rr0000gn/T/tmpbuo87apc/envs/conda_envs/mlflow-c73ae8786f87d613ef17c0fe652494c39f6dff36-8f29c64111ad37fd5ede73b43578e91380b4e0f0 ===


Channels:
 - conda-forge
 - defaults
Platform: osx-64
Collecting package metadata (repodata.json): done
Solving environment: done

python-3.12.11       | 12.9 MB   |            |   0% 
tk-8.6.13            | 3.1 MB    |            |   0% [A

openssl-3.5.2        | 2.6 MB    |            |   0% [A[A


pip-25.1             | 1.2 MB    |            |   0% [A[A[A



libsqlite-3.50.4     | 957 KB    |            |   0% [A[A[A[A




ncurses-6.5          | 803 KB    |            |   0% [A[A[A[A[A





setuptools-80.9.0    | 731 KB    |            |   0% [A[A[A[A[A[A






readline-8.2         | 251 KB    |            |   0% [A[A[A[A[A[A[A







ca-certificates-2025 | 151 KB    |            |   0% [A[A[A[A[A[A[A[A








bzip2-1.0.8          | 131 KB    |            |   0% [A[A[A[A[A[A[A[A[A









tzdata-2025b         | 120 KB    |            |   0% [A[A[A[A[A[A[A[A[A[A










liblzma-5.8.1        | 102 KB    |            |   0% [

2025/08/18 19:19:25 INFO mlflow.utils.environment: === Running command '['bash', '-c', 'source /opt/anaconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-c73ae8786f87d613ef17c0fe652494c39f6dff36-8f29c64111ad37fd5ede73b43578e91380b4e0f0 1>&2 && python -c ""']'
2025/08/18 19:19:26 INFO mlflow.utils.environment: === Running command '['bash', '-c', 'source /opt/anaconda3/bin/../etc/profile.d/conda.sh && conda activate mlflow-c73ae8786f87d613ef17c0fe652494c39f6dff36-8f29c64111ad37fd5ede73b43578e91380b4e0f0 1>&2 && python /opt/anaconda3/envs/mlflow_env/lib/python3.12/site-packages/mlflow/pyfunc/_mlflow_pyfunc_backend_predict.py --model-uri file:///var/folders/p_/237vh0m92ws4lbbnrhz3b1rr0000gn/T/tmpc3ebtcf6/model --content-type json --input-path /var/folders/p_/237vh0m92ws4lbbnrhz3b1rr0000gn/T/tmpdv8aq5hx/input.json']'


{"predictions": [145.26639040933335]}

In [17]:
import sklearn
print(sklearn.__version__)

1.6.1
