In [1]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Load dataset
df = pd.read_csv("clean_car_price.csv")

X = df.drop(columns=["selling_price"])
y = df["selling_price"]

numeric_features = ['year', 'km_driven', 'seats', 'max_power (in bph)', 'Mileage(KMPL)', 'Engine (CC)']
categorical_features = ['name', 'fuel', 'owner']

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ],
    remainder="drop"
)

model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model.fit(X_train, y_train)

joblib.dump(model, "car_price_model.joblib")
print("✅ Model trained and saved successfully with the latest scikit-learn version.")


✅ Model trained and saved successfully with the latest scikit-learn version.
