In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [None]:
file_path = 'used_cars_Pakistan.csv'  # Adjust the path as needed
data = pd.read_csv(file_path)


In [None]:
data_cleaned = data.drop(columns=['Unnamed: 0', 'Battery'])

In [None]:
data_cleaned['Engine_displacement'].fillna(data_cleaned['Engine_displacement'].median(), inplace=True)


In [None]:
data_sampled = data_cleaned.sample(frac=0.1, random_state=42)


In [None]:
X = data_sampled.drop(columns=['Price_Rs'])
y = data_sampled['Price_Rs']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
categorical_features = ['make', 'model', 'city']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')


In [None]:
preprocessor = ColumnTransformer(transformers=[
    ('cat', categorical_transformer, categorical_features)
], remainder='passthrough')

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=20, random_state=42))  # Reduced estimators for faster training
])

In [None]:
print("Training the model...")
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)

In [None]:
import joblib
joblib.dump(model, 'price_prediction_model.pkl')
print("Model saved as 'price_prediction_model.pkl'")