# 🚗 Car Price Prediction Project

This notebook demonstrates a machine learning workflow to predict car prices based on historical data.


In [None]:
# Step 1: Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [None]:
# Step 2: Load Dataset
df = pd.read_csv('car_data.csv')  # Replace with your path
df.head()

In [None]:
# Step 3: Data Info
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Step 4: Exploratory Data Analysis
sns.histplot(df['Price'], kde=True)
plt.title("Car Price Distribution")
plt.show()

sns.countplot(x='Fuel_Type', data=df)
plt.title("Fuel Type Count")
plt.show()

In [None]:
# Step 5: Encode Categorical Variables
df_encoded = df.copy()
le = LabelEncoder()

categorical_cols = ['Fuel_Type', 'Seller_Type', 'Transmission']
for col in categorical_cols:
    df_encoded[col] = le.fit_transform(df_encoded[col])

df_encoded.head()

In [None]:
# Step 6: Feature Selection & Splitting
X = df_encoded[['Year', 'Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner']]
y = df_encoded['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 7: Model Training
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Step 8: Prediction and Evaluation
y_pred = model.predict(X_test)

print("R² Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))

In [None]:
# Step 9: Visualization
plt.scatter(y_test, y_pred)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Actual vs Predicted Prices")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
plt.show()