In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [2]:
# Load the dataset (replace 'your_dataset.csv' with the actual file path)
data = pd.read_csv('car data.csv')

In [3]:
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
print(data.columns)

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Driven_kms',
       'Fuel_Type', 'Selling_type', 'Transmission', 'Owner'],
      dtype='object')


In [5]:
# Select features (X) and target variable (y)
X = data.drop(['Selling_Price'], axis=1)
y = data['Selling_Price']

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
# Create a column transformer for preprocessing
# Standardize numerical features
# Note: You don't need OneHotEncoder for the dummy variables, as they are already binary
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Year', 'Present_Price', 'Driven_kms', 'Owner'])
    ])



In [8]:
# Create a pipeline with preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [9]:
# Train the model using the pipeline
pipeline.fit(X_train, y_train)


In [10]:
# Model evaluation
y_pred = pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 0.9716568119672128


In [11]:
print("\n--- Car Price Prediction ---")


--- Car Price Prediction ---


In [12]:
# ... (previous code remains unchanged)

# User interaction
print("\n--- Car Price Prediction ---")

# Get user input for car details
year = int(input("Enter the manufacturing year of the car: "))
present_price = float(input("Enter the present price of the car (in lakhs): "))
driven_kms = float(input("Enter the total distance driven by the car (in kms): "))
fuel_type = input("Enter the fuel type of the car (Petrol/Diesel/CNG): ")
transmission = input("Enter the transmission type of the car (Manual/Automatic): ")
owner = int(input("Enter the number of previous owners of the car: "))
brand = input("Enter the brand of the car: ")

# Create a DataFrame with user input
user_data = pd.DataFrame({
    'Year': [year],
    'Present_Price': [present_price],
    'Driven_kms': [driven_kms],
    'Fuel_Type': [fuel_type],
    'Transmission': [transmission],
    'Owner': [owner],
    'Brand_' + brand: [1]  # Set the corresponding brand column to 1
})

# Make predictions using the trained model
predicted_price = pipeline.predict(user_data)[0]

# Display the predicted selling price
print(f"\nPredicted Selling Price: {predicted_price} lakhs")



--- Car Price Prediction ---
Enter the manufacturing year of the car: 2000
Enter the present price of the car (in lakhs): 4000000
Enter the total distance driven by the car (in kms): 485967
Enter the fuel type of the car (Petrol/Diesel/CNG): petrol
Enter the transmission type of the car (Manual/Automatic): manual
Enter the number of previous owners of the car: 2
Enter the brand of the car: tata

Predicted Selling Price: 27.471000000000004 lakhs
