In [37]:
#car Price Prediction
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

# Load and clean data
df = pd.read_csv("/content/car_dataset.csv")
df = df[df['Price'] != 'Ask For Price'].dropna()
df['Price'] = df['Price'].str.replace(',', '').astype(int)
df['kms_driven'] = df['kms_driven'].str.replace(' kms', '').str.replace(',', '')
df['kms_driven'] = df['kms_driven'].replace('', np.nan).dropna().astype(int)
df['year'] = df['year'].astype(int)

# Features and target
X = df[['company', 'year', 'kms_driven', 'fuel_type']]
y = df['Price']

# Preprocessing and model pipeline
categorical = ['company', 'fuel_type']
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical)
], remainder='passthrough')

model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=200, random_state=42))
])

# Train and save model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
joblib.dump(model, 'car_price_model.pkl')

# Adjusted prediction function
def predict_price(company, year, kms, fuel_type):
    new_data = pd.DataFrame([[company, year, kms, fuel_type]],
                            columns=['company', 'year', 'kms_driven', 'fuel_type'])
    base_price = model.predict(new_data)[0]

    # Custom logic: Adjust prediction
    if year >= 2022:
        base_price *= 1.25  # boost 25%
    elif year >= 2020:
        base_price *= 1.15  # boost 15%
    elif year >= 2015:
        base_price *= 1.05  # small boost
    elif year <= 2010:
        base_price *= 0.85  # reduce for old cars

    if kms >= 100000:
        base_price *= 0.85  # reduce for high kms
    elif kms >= 70000:
        base_price *= 0.90
    elif kms >= 50000:
        base_price *= 0.95

    return int(base_price)

# User input
company = input("Company of Car: ").strip()
car_name = input("Car Name: ").strip()  # For display only

try:
    year = int(input("Year of Purchase: ").strip())
    kms = int(input("Kilometers Driven: ").strip())
except:
    print("Invalid number format for year or kilometers.")
    exit()

fuel_type = input("Fuel Type (Petrol/Diesel/CNG): ").strip()

# Output
try:
    price = predict_price(company, year, kms, fuel_type)
    print(f"\n🚗 Estimated Price for '{car_name}' from {company} ({year}): ₹{price:,}")
except:
    print("❌ Prediction failed. Please check your inputs.")


Company of Car: Honda
Car Name: City
Year of Purchase: 2023
Kilometers Driven: 10000
Fuel Type (Petrol/Diesel/CNG): Petrol

🚗 Estimated Price for 'City' from Honda (2023): ₹636,524
