In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load your cleaned data
df = pd.read_csv('Car_Dataset_final.csv')

# Select features and target
X = df[['year', 'mileage', 'make', 'model', 'engine', 'transmission', 'fuel', 'assembly']]
y = df['price']

In [3]:
# Identify categorical columns
categorical_cols = ['make', 'model', 'transmission', 'fuel', 'assembly']
numerical_cols = ['year', 'mileage', 'engine']

# Create preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Transform features
X_processed = preprocessor.fit_transform(X)

In [4]:
# Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42)

In [5]:
# Initialize and train model
model = LinearRegression()
model.fit(X_train, y_train)

In [6]:
# Make predictions
y_pred = model.predict(X_test)

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae:.2f}")
print(f"MSE: {mse:.2f}")
print(f"R² Score: {r2:.2f}")

MAE: 572913.74
MSE: 644943343090.64
R² Score: 0.83


In [9]:
model.coef_

array([ 1.39727061e+05, -5.04407881e+00,  1.89005484e+03, -1.24896730e+02,
        2.95883029e+04,  6.23695600e+03, -6.29268980e+02, -1.11728236e+05,
        2.11174900e+03, -4.49019704e+03,  2.12212562e+04, -1.68763346e+03,
        1.00116751e+03,  2.75418379e+04, -6.21718835e+04, -1.61350112e+02,
        5.70952824e+03,  8.51045738e+04,  1.05190468e+05,  9.06720148e+01,
       -2.63659010e+03, -1.48626616e+03, -4.31915591e+03,  1.18109218e+05,
       -1.82903309e+02, -9.83302337e+02,  1.10472491e+05,  1.53655355e+03,
        1.88741890e+02,  3.29736190e+04, -3.47079822e+04, -5.64540746e+02,
       -1.63743061e+04, -6.97983129e+02, -3.95984835e+04,  9.20751583e+03,
       -8.52198105e+02,  5.44163160e+01, -4.31055282e+05,  1.75231709e+05,
       -1.63955521e+04, -1.74883972e+01, -7.05275389e+02,  1.79163969e+04,
        3.82105118e+03,  4.97486546e+03, -2.95196688e+02, -1.72185937e+03,
       -1.04236455e+03, -4.73994238e+04, -5.11555141e+02, -1.27093996e+02,
       -1.58224637e+03, -