In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [5]:
#Load the dataset
df = pd.read_csv("cars.csv")

In [6]:
#Separate features and target
X = df.drop("Price", axis = 1)
y = df["Price"]

In [8]:
#Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=["object", "category"]).columns
numerical_cols = X.select_dtypes(exclude=["object"]).columns

In [9]:
#Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown = "ignore"), categorical_cols),
        ("num", StandardScaler(), numerical_cols)
    ]
)

In [10]:
#KNN Regressor model
knn_model = KNeighborsRegressor(n_neighbors = 5)

In [12]:
#Create pipeline
pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", knn_model)
])

In [13]:
#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [14]:
#Train the model
pipeline.fit(X_train, y_train)

Pipeline(steps=[('preprocess',
                 ColumnTransformer(transformers=[('cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  Index(['FuelType', 'Transmission'], dtype='object')),
                                                 ('num', StandardScaler(),
                                                  Index(['EngineSize', 'Year', 'KilometersDriven'], dtype='object'))])),
                ('model', KNeighborsRegressor())])

In [15]:
#Make predictions
y_pred = pipeline.predict(X_test)

In [17]:
#Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: ", mse)
print("Root Mean Squared Error: ", mse ** 0.5)

Mean Squared Error:  176426000000.0
Root Mean Squared Error:  420030.95124050084
