# ðŸš— Car Price Prediction Project
### Random Forest + Streamlit Deployment
This notebook trains a model and generates a Streamlit interface.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import joblib

In [2]:
# Load Dataset (Make sure CSV file is in same folder)
df = pd.read_csv("cars.csv")  # Change name if needed
df.head()

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,selling_price
0,Maruti 800 AC,2007,70000,Petrol,Individual,Manual,First Owner,60000
1,Maruti Wagon R LXI Minor,2007,50000,Petrol,Individual,Manual,First Owner,135000
2,Hyundai Verna 1.6 SX,2012,100000,Diesel,Individual,Manual,First Owner,600000
3,Datsun RediGO T Option,2017,46000,Petrol,Individual,Manual,First Owner,250000
4,Honda Amaze VX i-DTEC,2014,141000,Diesel,Individual,Manual,Second Owner,450000


In [3]:
# Separate features and target
X = df.drop('selling_price', axis=1)
y = df['selling_price']

# Automatically detect categorical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()

# Preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

# Pipeline with Random Forest
model = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=300, random_state=42))
])

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train Model
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)

print("R2:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

R2: 0.5750120012188297
MAE: 118411.86777968876
RMSE: 360130.1281072606




In [4]:
# Save model
joblib.dump(model, "car_price_model.pkl")
print("Model saved as car_price_model.pkl")

Model saved as car_price_model.pkl


In [None]:
# Create Streamlit app file
app_code = '''
import streamlit as st
import pandas as pd
import joblib

model = joblib.load("car_price_model.pkl")

st.title("ðŸš— Car Price Prediction App")

name = st.text_input("Car Name (Example: Maruti Swift Dzire VDI)")
year = st.number_input("Year", min_value=1990, max_value=2025)
km_driven = st.number_input("Kilometers Driven", min_value=0)
fuel = st.selectbox("Fuel Type", ["Petrol", "Diesel", "CNG", "LPG"])
seller_type = st.selectbox("Seller Type", ["Individual", "Dealer", "Trustmark Dealer"])
transmission = st.selectbox("Transmission", ["Manual", "Automatic"])
owner = st.selectbox("Owner Type", ["First Owner", "Second Owner", "Third Owner", "Fourth & Above Owner"])

if st.button("Predict Price"):
    input_data = pd.DataFrame({
        "name": [name],
        "year": [year],
        "km_driven": [km_driven],
        "fuel": [fuel],
        "seller_type": [seller_type],
        "transmission": [transmission],
        "owner": [owner]
    })
    
    prediction = model.predict(input_data)
    st.success(f"Estimated Selling Price: â‚¹ {int(prediction[0])}")
'''

with open("app.py", "w") as f:
    f.write(app_code)

print("Streamlit app saved as app.py")

Streamlit app saved as app.py
