In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import matplotlib.pyplot as plt


file_path = r"E:\ML\unified mentor\Projects-20240722T093004Z-001\Projects\vehicle_price_prediction\vehicle_data.csv"
data = pd.read_csv(file_path)

data = data.dropna()

label_encoder = LabelEncoder()
data['make'] = label_encoder.fit_transform(data['make'])
data['model'] = label_encoder.fit_transform(data['model'])
data['fuel'] = label_encoder.fit_transform(data['fuel'])
data['transmission'] = label_encoder.fit_transform(data['transmission'])
data['trim'] = label_encoder.fit_transform(data['trim'])
data['body'] = label_encoder.fit_transform(data['body'])
data['exterior_color'] = label_encoder.fit_transform(data['exterior_color'])
data['interior_color'] = label_encoder.fit_transform(data['interior_color'])
data['drivetrain'] = label_encoder.fit_transform(data['drivetrain'])


X = data.drop(['name', 'description', 'price'], axis=1)
y = data['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)


y_pred = model.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")


features = X.columns
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]

plt.figure(figsize=(10, 6))
plt.title("Feature Importance")
plt.barh(range(len(indices)), importances[indices], align="center")
plt.yticks(range(len(indices)), features[indices])
plt.xlabel("Feature Importance")
plt.show()


joblib.dump(model, 'vehicle_price_prediction_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


loaded_model = joblib.load('vehicle_price_prediction_model.pkl')
loaded_scaler = joblib.load('scaler.pkl')


