## Imports

In [130]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from joblib import dump, load
from sklearn.preprocessing import LabelEncoder

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [131]:
%cd /content/drive/MyDrive/Colab Notebooks/

/content/drive/MyDrive/Colab Notebooks


In [132]:
data = pd.read_csv('get_around_pricing_project.csv', index_col=0)

## Cleaning

In [133]:
list=['Mini','Honda','Mazda','Lexus','Alfa Romeo']
for i in range(len(list)) :
    data.drop(data[(data['model_key'] ==list[i])].index,axis=0,inplace=True)

## Preprocessing

* Separate target variable Y from features X

In [134]:
target_variable = "rental_price_per_day"
X = data.drop(target_variable, axis = 1)
Y = data.loc[:,target_variable]
print("...Done.")

...Done.


* Categorisation num/cat

In [135]:
numeric_features = []
categorical_features = []
for i,t in X.dtypes.items():
    if ('float' in str(t)) or ('int' in str(t)) :
        numeric_features.append(i)
    else :
        categorical_features.append(i)

* Split 20/80

In [136]:
numeric_features=['mileage', 'engine_power']
categorical_features = ['model_key', 'fuel', 'paint_color', 'car_type', 'private_parking_available', 'has_gps', 'has_air_conditioning', 'automatic_car', 'has_getaround_connect', 'has_speed_regulator', 'winter_tires']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
print("...Done.")

...Done.


* Pipeline

In [137]:
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_transformer = Pipeline(
    steps=[
    ('encoder', OneHotEncoder(drop='first'))
    ])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



* Gradient Boost Regressor:

In [138]:
model = Pipeline(steps=[
        ("Preprocessing", preprocessor),
        ("Regressor",XGBRegressor(learning_rate= 0.05, max_depth= 8, min_child_weight= 4, n_estimators= 150))
    ])

model.fit(X_train, Y_train)
train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

print("r2 on train set : ", r2_score(Y_train, train_pred))
print("r2 on test set : ", r2_score(Y_test, test_pred))

r2 on train set :  0.9070636847816742
r2 on test set :  0.7674115318887376


## Preparing the API

In [139]:
dump(model, 'api/model_xg_getaround.joblib')

['model_xg_getaround.joblib']

In [140]:
predi_Features = {
  "model_key": "Toyota",
  "mileage": 25000,
  "engine_power": 130,
  "fuel": "diesel",
  "paint_color": "red",
  "car_type": "sedan",
  "private_parking_available": True,
  "has_gps": True,
  "has_air_conditioning": True,
  "automatic_car": False,
  "has_getaround_connect": True,
  "has_speed_regulator": True,
  "winter_tires": True
  }

In [141]:
data = pd.DataFrame(dict(predi_Features), index=[0])
loaded_model = load('model_xg_getaround.joblib')
prediction = loaded_model.predict(data)
print('Predicted rental price per day : ', round(prediction.tolist()[0],2),'$')

Predicted rental price per day :  152.81 $
