In [1]:
# Importing essential libraries
import pandas as pd
import numpy as np
import joblib as jb
from sklearn import preprocessing, metrics, impute, model_selection, compose, pipeline, ensemble

In [2]:
# Reading data
df = pd.read_csv('data.csv')
df.drop('car_ID',axis=1,inplace=True)
# Split in feature and target for future purpose
features = df.drop(['CarName','price'],axis=1)
target = df.price

# Names of numerical and categorical features
numerical_features = [col for col in features.columns if features[col].dtypes!='O']
categorical_features = [col for col in features.columns if col not in numerical_features]

# Train test split
x_train,x_test,y_train,y_test = model_selection.train_test_split(features,target,random_state=32)

# Nominal features
nominal_features = ['fueltype','aspiration','doornumber','carbody','drivewheel','enginelocation','enginetype','cylindernumber','fuelsystem']

In [3]:
# Transformer used to encode the categorical column
encode = compose.make_column_transformer(
    (preprocessing.OneHotEncoder(drop = 'first',handle_unknown='ignore'),categorical_features),
    remainder = 'passthrough'
)
# Optimal parameterized random forest regressor
model = ensemble.RandomForestRegressor(n_estimators=500,min_samples_split=5,min_samples_leaf=1,max_features='sqrt',max_depth=20)
final_pipeline = pipeline.make_pipeline(encode,model)

In [4]:
final_pipeline.fit(x_train,y_train)

In [5]:
final_pipeline.predict(x_test)



array([11524.12822201, 19085.617694  ,  6958.39330718,  7798.88900317,
        8210.38948222,  9302.43982901,  7663.85295509,  7186.58152835,
        9815.02364582, 20486.80714439, 16332.75922949,  8790.78759964,
        7609.70593503,  9990.04817363,  9794.96658889,  9148.07710202,
        8944.78022872,  8312.78111359, 17657.99867202,  8881.00799459,
       16898.53228676,  9954.42066638, 34489.40827358, 10944.92293001,
       17533.97227457,  6980.5204636 ,  6533.3800009 ,  8527.35911082,
        6076.46329228,  6726.186989  , 11421.98717187, 14102.31528547,
       18550.83825678,  8279.1754123 , 10164.7193881 , 12418.4085239 ,
       11263.400164  , 16879.41586747,  7921.58876897, 14887.09043509,
       13033.34918757,  9188.02699495, 11430.79820602,  9142.60390606,
        6760.7394307 , 17631.38747276,  6118.49067958, 15352.03047287,
       37329.86954628, 18236.01015839, 16986.04909906,  6739.53204975])