In [112]:
# Imported necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder 
from sklearn.compose import make_column_transformer 
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestRegressor
import joblib

# Read the dataset in csv file
used_car = pd.read_csv('used_cars_dataset.csv')

# Dropped the column, created a new dataframe "X" which is to be predicted by the model.
# The axis=1 argument indicates that the operation should be performed along the columns axis.
X = used_car.drop(columns='Price', axis=1)
y = used_car['Price']

# Assigned the train and test values for input and output dataframes.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

# As our problem is regression based so after checking r2 scores,
# The best alogorithm was choosed and created an object(model) of class "RandomForestRegressor"
model = RandomForestRegressor()

# Create a column transformer with OneHotEncoder for encoding categorial values.
# When handle_unknown is set to 'ignore', the OneHotEncoder will not raise an error if it
# encounters a previously unknown category during the transformation.
# remainder = "passthrough" means that any columns not specified in the OneHotEncoder transformer will 
# be passed through without any transformation. 
column_trans = make_column_transformer(
    (OneHotEncoder(handle_unknown='ignore'), ["Brand", "Fuel", "Model", "Registered City"]),
    remainder='passthrough'
)

# Create a pipeline with column transformer and Random Forest regression model whose one-end will be
# given encoded data through OneHotEncoder and trained model will be given thorough other end.
pipe = make_pipeline(column_trans, model)

# Fit (to recognize relationship between input and output dataframes) the pipeline
# to the training data
pipe.fit(X_train, y_train)

# Dumped the trained model in the pipelined into a .joblib file
joblib.dump(pipe,'Used_Car.joblib')


['Used_Car.joblib']