In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
file_path = "/content/drive/MyDrive/car.csv"
data = pd.read_csv(file_path)

In [4]:
print("First 5 rows:\n", data.head())

First 5 rows:
   Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  


In [21]:
print("\nMissing values:\n", data.isnull().sum())


Missing values:
 Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
Car_Age          0
dtype: int64


In [22]:
current_year = datetime.now().year
data['Car_Age'] = current_year - data['Year']

In [23]:
fuel_map = {'Petrol':0, 'Diesel':1, 'CNG':2}
seller_map = {'Dealer':0, 'Individual':1}
trans_map = {'Manual':0, 'Automatic':1}

In [24]:
data['Fuel_Type'] = data['Fuel_Type'].map(fuel_map)
data['Seller_Type'] = data['Seller_Type'].map(seller_map)
data['Transmission'] = data['Transmission'].map(trans_map)

In [25]:
features = ['Present_Price','Kms_Driven','Car_Age','Fuel_Type','Seller_Type','Transmission','Owner']
X = data[features]
y = data['Selling_Price']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [27]:
rf = RandomForestRegressor(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)

In [28]:
y_pred = rf.predict(X_test)

In [29]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

In [30]:
print("\n Random Forest Regression Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.2f}")


 Random Forest Regression Metrics:
Mean Absolute Error (MAE): 0.69
Mean Squared Error (MSE): 1.09
Root Mean Squared Error (RMSE): 1.04
R² Score: 0.95


In [31]:
print("\nEnter car details to predict selling price:")
present_price = float(input("Present Price (in lakhs): "))
kms_driven = float(input("Kilometers Driven: "))
year = int(input("Year of Manufacture: "))
print("Fuel Type (0=Petrol, 1=Diesel, 2=CNG):")
fuel = int(input())
print("Seller Type (0=Dealer, 1=Individual):")
seller = int(input())
print("Transmission Type (0=Manual, 1=Automatic):")
trans = int(input())
owner = int(input("Number of Previous Owners: "))

car_age = current_year - year

user_input = [[present_price, kms_driven, car_age, fuel, seller, trans, owner]]
pred_price = rf.predict(user_input)
print(f"\nPredicted selling price of the car: {pred_price[0]:.2f} lakhs")


Enter car details to predict selling price:
Present Price (in lakhs): 6.25
Kilometers Driven: 9874
Year of Manufacture: 2022
Fuel Type (0=Petrol, 1=Diesel, 2=CNG):
0
Seller Type (0=Dealer, 1=Individual):
1
Transmission Type (0=Manual, 1=Automatic):
1
Number of Previous Owners: 1

Predicted selling price of the car: 5.66 lakhs


