In [1]:
# ------------------------------------------
# 1. Import Libraries
# ------------------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# ------------------------------------------
# 2. Load Dataset
# ------------------------------------------
df = pd.read_csv("cardekho.csv")

print("Dataset Loaded Successfully!")
print(df.head())

# ------------------------------------------
# 3. Data Cleaning
# ------------------------------------------

# Remove duplicates
df.drop_duplicates(inplace=True)

# Drop rows with missing values
df.dropna(inplace=True)

# Remove negative or zero prices
df = df[df['selling_price'] > 0]

# Remove unrealistic manufacture years
df = df[df['year'] > 1990]

print("Rows after cleaning:", len(df))

# ------------------------------------------
# 4. Split features and target
# ------------------------------------------
X = df.drop("selling_price", axis=1)
y = df["selling_price"]

# ------------------------------------------
# 5. Handle categorical columns
# ------------------------------------------
categorical_cols = ["name", "fuel", "seller_type", "transmission", "owner"]

numeric_cols = ["year", "km_driven"]

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

# ------------------------------------------
# 6. Create Pipeline (Preprocess + Model)
# ------------------------------------------
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# ------------------------------------------
# 7. Train-Test Split
# ------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ------------------------------------------
# 8. Train the Model
# ------------------------------------------
model.fit(X_train, y_train)
print("Model Trained Successfully!")

# ------------------------------------------
# 9. Prediction Function
# ------------------------------------------

def predict_price():
    print("\n--- Enter Car Details ---")

    year = int(input("Enter car year: "))
    km_driven = int(input("Enter KM driven: "))
    fuel = input("Enter Fuel Type (Petrol/Diesel/CNG/LPG/Electric): ")
    seller_type = input("Enter Seller Type (Individual/Dealer/Trustmark Dealer): ")
    transmission = input("Enter Transmission (Manual/Automatic): ")
    owner = input("Enter Owner Type (First Owner/Second Owner/etc.): ")
    name = input("Enter Car Name (e.g., Maruti Swift, Hyundai i20): ")

    new_data = pd.DataFrame({
        "name": [name],
        "year": [year],
        "km_driven": [km_driven],
        "fuel": [fuel],
        "seller_type": [seller_type],
        "transmission": [transmission],
        "owner": [owner]
    })

    predicted_price = model.predict(new_data)[0]

    # Avoid negative prices
    predicted_price = max(0, predicted_price)

    print(f"\nPredicted Car Price (in INR): ₹ {predicted_price:,.2f}")

# ------------------------------------------
# 10. Run Prediction
# ------------------------------------------
predict_price()

import pickle
pickle.dump(model, open("car_price_model.pkl", "wb"))




Dataset Loaded Successfully!
                       name  year  selling_price  km_driven    fuel  \
0             Maruti 800 AC  2007          60000      70000  Petrol   
1  Maruti Wagon R LXI Minor  2007         135000      50000  Petrol   
2      Hyundai Verna 1.6 SX  2012         600000     100000  Diesel   
3    Datsun RediGO T Option  2017         250000      46000  Petrol   
4     Honda Amaze VX i-DTEC  2014         450000     141000  Diesel   

  seller_type transmission         owner  
0  Individual       Manual   First Owner  
1  Individual       Manual   First Owner  
2  Individual       Manual   First Owner  
3  Individual       Manual   First Owner  
4  Individual       Manual  Second Owner  
Rows after cleaning: 3577
Model Trained Successfully!

--- Enter Car Details ---

Predicted Car Price (in INR): ₹ 1,020,922.37
