# step 1: **Data Preprocessing**

**Encoding Categorical Values**


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = pd.read_csv("car_price_dataset.csv")

# One-Hot Encoding for categorical columns
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_data = encoder.fit_transform(df[['Brand', 'Model', 'Fuel_Type', 'Transmission']])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out())

# Concatenate encoded features and drop original categorical columns
df = pd.concat([df, encoded_df], axis=1).drop(['Brand', 'Fuel_Type', 'Model', 'Transmission'], axis=1)




## Plotting

In [2]:

# Scatter plot of each feature vs Price

# for column in df.columns:
#     if column != 'Price':  
#         plt.figure(figsize=(10, 6))
#         plt.scatter(df[column], df['Price'], alpha=0.5)
#         plt.title(f'Scatter Plot of {column} vs Price')
#         plt.xlabel(column)
#         plt.ylabel('Price')
#         plt.grid(True)
#         plt.show()


# Fitting the model with linear regression

In [5]:

# Train-Test Split
X = df.drop('Price', axis=1)
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate Model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")


Mean Squared Error: 4213.9224983852
R-squared: 0.9995413573364617


# Taking User Input

In [None]:

# User Input
brand = input("Enter Car Brand: ").lower()
modell = input("Enter Car Model: ").lower()
year = int(input("Enter Year: "))
engine_size = float(input("Enter Engine Size (cc): "))
fuel_type = input("Enter Fuel Type: ").lower()
transmission = input("Enter Transmission Type: ").lower()
mileage = float(input("Enter Mileage (km): "))
doors = int(input("Enter Number of Doors: "))
owner_count = int(input("Enter Owner Count: "))

#Preprocess User Input
input_data = pd.DataFrame([[brand, modell, fuel_type, transmission, year, mileage, engine_size, doors, owner_count]], 
                          columns=['Brand', 'Model' , 'Fuel_Type', 'Transmission', 'Year', 'Mileage', 'Engine_Size', 'Doors', 'Owner_Count'])

# Transform input using the same encoder
encoded_input = encoder.transform(input_data[['Brand', 'Model', 'Fuel_Type', 'Transmission']])
encoded_input_df = pd.DataFrame(encoded_input, columns=encoder.get_feature_names_out())

# Merge with numerical features
final_input = pd.concat([encoded_input_df, input_data[['Year', 'Mileage', 'Engine_Size', 'Doors', 'Owner_Count']]], axis=1)


# --- Predict Price ---
predicted_price = model.predict(final_input)
print(f"\nPredicted Car Price: ${predicted_price[0]:,.2f}")



Predicted Car Price: $9,753.64
