![snap](https://lever-client-logos.s3.amazonaws.com/2bd4cdf9-37f2-497f-9096-c2793296a75f-1568844229943.png)

# Web dashboard

Dashboard : https://huggingface.co/spaces/MaFae/Jedha_Bloc_5_GetAround



# Machine Learning

## Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import  StandardScaler, OneHotEncoder
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import joblib

## Data

In [2]:
data_pricing = pd.read_csv('https://full-stack-assets.s3.eu-west-3.amazonaws.com/Deployment/get_around_pricing_project.csv')
data_pricing.head()

Unnamed: 0.1,Unnamed: 0,model_key,mileage,engine_power,fuel,paint_color,car_type,private_parking_available,has_gps,has_air_conditioning,automatic_car,has_getaround_connect,has_speed_regulator,winter_tires,rental_price_per_day
0,0,Citroën,140411,100,diesel,black,convertible,True,True,False,False,True,True,True,106
1,1,Citroën,13929,317,petrol,grey,convertible,True,True,False,False,False,True,True,264
2,2,Citroën,183297,120,diesel,white,convertible,False,False,False,False,True,False,True,101
3,3,Citroën,128035,135,diesel,red,convertible,True,True,False,False,True,True,True,158
4,4,Citroën,97097,160,diesel,silver,convertible,True,True,False,False,False,True,True,183


In [3]:
data_pricing.describe(include='all')

Unnamed: 0.1,Unnamed: 0,model_key,mileage,engine_power,fuel,paint_color,car_type,private_parking_available,has_gps,has_air_conditioning,automatic_car,has_getaround_connect,has_speed_regulator,winter_tires,rental_price_per_day
count,4843.0,4843,4843.0,4843.0,4843,4843,4843,4843,4843,4843,4843,4843,4843,4843,4843.0
unique,,28,,,4,10,8,2,2,2,2,2,2,2,
top,,Citroën,,,diesel,black,estate,True,True,False,False,False,False,True,
freq,,969,,,4641,1633,1606,2662,3839,3865,3881,2613,3674,4514,
mean,2421.0,,140962.8,128.98823,,,,,,,,,,,121.214536
std,1398.198007,,60196.74,38.99336,,,,,,,,,,,33.568268
min,0.0,,-64.0,0.0,,,,,,,,,,,10.0
25%,1210.5,,102913.5,100.0,,,,,,,,,,,104.0
50%,2421.0,,141080.0,120.0,,,,,,,,,,,119.0
75%,3631.5,,175195.5,135.0,,,,,,,,,,,136.0


## Preprocessing

In [4]:
data_pricing = data_pricing.drop('Unnamed: 0', axis=1)

# X, y split 
target_variable = "rental_price_per_day"
X = data_pricing.drop(target_variable, axis=1)
y = data_pricing[target_variable]

# Train / test split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [5]:
# Preprocessing 
categorical_features = X_train.select_dtypes(include=["object", "bool"]).columns
categorical_transformer = OneHotEncoder(categories=[sorted(X[col].unique().tolist()) for col in categorical_features], drop='first')

numerical_features = X_train.select_dtypes(include=['int64', 'float64']).columns
numerical_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ("categorical_transformer", categorical_transformer, categorical_features),
        ("numerical_transformer", numerical_transformer, numerical_features)
    ]
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', XGBRegressor(random_state=42))
])

## Model Training

In [6]:
# Perform grid search
print("Grid search...")

params = {
    'regressor__n_estimators':[10,20,30,40, 50, 100, 150, 200, 250, 300, 350, 400],
    "regressor__learning_rate":[2, 1.5, 1.0, 0.5, 0.1, 0.05, 0.01],
    'regressor__max_depth': [2, 3, 4, 5],
    }

print(params)
gridsearch = GridSearchCV(
    pipeline,
    param_grid = params,
    cv = 3)

gridsearch.fit(X_train, y_train)
print("...Done.")
print("Best hyperparameters : ", gridsearch.best_params_)
print("Best validation R2 : ", gridsearch.best_score_)
print()
print("R2 on training set : ", gridsearch.score(X_train, y_train))
print("R2 on test set : ", gridsearch.score(X_test, y_test))

best_model = gridsearch.best_estimator_

y_pred = best_model.predict(X_test)
r2 = r2_score(y_test, y_pred)

print(f"\n--- Évaluation du modèle sur l'ensemble de test ---")
print(f"R² sur l'ensemble de test : {r2:.2f}")

Grid search...
{'regressor__n_estimators': [10, 20, 30, 40, 50, 100, 150, 200, 250, 300, 350, 400], 'regressor__learning_rate': [2, 1.5, 1.0, 0.5, 0.1, 0.05, 0.01], 'regressor__max_depth': [2, 3, 4, 5]}
...Done.
Best hyperparameters :  {'regressor__learning_rate': 0.1, 'regressor__max_depth': 3, 'regressor__n_estimators': 300}
Best validation R2 :  0.7525010704994202

R2 on training set :  0.8252252340316772
R2 on test set :  0.7538735270500183

--- Évaluation du modèle sur l'ensemble de test ---
R² sur l'ensemble de test : 0.75


## Saving model

In [7]:
filename = 'modele_GAR.joblib'

#joblib.dump(gridsearch, filename)

print(f"Modèle enregistré sous : {filename}")

Modèle enregistré sous : modele_GAR.joblib


# API Prediction

API : https://mafae-jedha-bloc-5-getaround-api.hf.space/docs#/

## Test requests on API

In [8]:
import requests

data = {"model_key": "Renault", 
        "mileage": 109839, 
        "engine_power": 135, 
        "fuel": "diesel", 
        "paint_color": "black", 
        "car_type": "sedan", 
        "private_parking_available": True, 
        "has_gps": True, 
        "has_air_conditioning": False, 
        "automatic_car": False, 
        "has_getaround_connect": True, 
        "has_speed_regulator": False, 
        "winter_tires": True 
        }

response = requests.post("https://mafae-jedha-bloc-5-getaround-api.hf.space/predict", json=data)

response.json()

{'prediction': 137.91529846191406}