In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the dataset (replace 'car_data.csv' with your actual dataset file)
data = pd.DataFrame({
    'Brand': ['Toyota', 'Honda', 'Ford', 'Toyota', 'Honda', 'Ford', 'Toyota', 'Honda', 'Ford'],
    'Horsepower': [100, 120, 150, 110, 130, 160, 105, 125, 155],
    'Mileage': [20000, 30000, 40000, 25000, 35000, 45000, 22000, 32000, 42000],
    'Price': [20000, 25000, 30000, 22000, 27000, 32000, 21000, 26000, 31000]
})

# Encode categorical variables
data = pd.get_dummies(data, drop_first=True)

# Split the data into features (X) and target (y)
X = data.drop('Price', axis=1)
y = data['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Visualize actual vs. predicted prices
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs. Predicted Car Prices')
plt.show()