In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [11]:
data = pd.read_csv(r'C:\Users\Admin\Downloads\CAR DETAILS FROM CAR DEKHO.csv')
X = data[['year', 'km_driven', 'fuel', 'seller_type', 'transmission', 'owner']]
y = data['selling_price']


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), ['fuel', 'seller_type', 'transmission', 'owner'])
    ],
    remainder='passthrough'
)


In [14]:
rf_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


In [15]:
gb_model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', GradientBoostingRegressor(n_estimators=100, random_state=42))
])


In [16]:
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)


In [17]:
# Random Forest Predictions
rf_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)
print(f"Random Forest Mean Squared Error: {rf_mse:.2f}")

# Gradient Boosting Predictions
gb_pred = gb_model.predict(X_test)
gb_mse = mean_squared_error(y_test, gb_pred)
print(f"Gradient Boosting Mean Squared Error: {gb_mse:.2f}")


Random Forest Mean Squared Error: 153605580818.33
Gradient Boosting Mean Squared Error: 155981499418.46


In [18]:
rf_results = pd.DataFrame({'Actual': y_test, 'RandomForest_Predicted': rf_pred})
gb_results = pd.DataFrame({'Actual': y_test, 'GradientBoosting_Predicted': gb_pred})
print("Random Forest Predictions vs Actual Values:\n", rf_results.head())
print("\nGradient Boosting Predictions vs Actual Values:\n", gb_results.head())


Random Forest Predictions vs Actual Values:
       Actual  RandomForest_Predicted
3978  165000            1.562608e+05
1448  250000            4.801276e+05
2664  120000            1.228450e+05
17    450000            4.887250e+05
1634  730000            1.722266e+06

Gradient Boosting Predictions vs Actual Values:
       Actual  GradientBoosting_Predicted
3978  165000                2.805637e+05
1448  250000                4.855637e+05
2664  120000                1.313053e+05
17    450000                4.887846e+05
1634  730000                1.767717e+06
