In [None]:
# https://www.kaggle.com/code/mohammadrahdanmofrad/linear-regression-model/notebook#Improve-model-accuracy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

Load data

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ML/CAR DETAILS FROM CAR DEKHO.csv')
target = data['selling_price']
feature = data.drop('selling_price', axis=1)



Split data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(feature, target, test_size=0.2, random_state=100)

Standardization

In [None]:
nom_transform = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

num_transform = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

Preprocess = ColumnTransformer(transformers = [
    ('nom_transform', nom_transform, ['name', 'fuel', 'seller_type', 'transmission', 'owner']),
    ('num_transform', num_transform, ['year', 'km_driven'])
])


Train model

In [None]:
model = Pipeline([
    ('preprocess', Preprocess),
    ('model', LinearRegression())
])

model.fit(x_train, y_train)
y_pred = model.predict(x_test)


Result

In [None]:
print('Mean Squared Error:', mean_squared_error(y_test, y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test, y_pred))
print('R^2 Score:', r2_score(y_test, y_pred))


Mean Squared Error: 41585960452.11047
Mean Absolute Error: 97314.14283182415
R^2 Score: 0.8771581718168928
