In [2]:
import warnings
warnings.filterwarnings('ignore')


In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the dataset
data = pd.read_csv('clean6_car_price.csv')

# Drop unnecessary columns (like index or irrelevant features)
data.drop(['Unnamed: 0', 'car_name'], axis=1, inplace=True)

# Split the data into features and target variable
X = data[['engine', 'manufacture','Seats','kms_driven','ownership','fuel_type','transmission']]

y = data['car_prices_in_rupee']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, predictions)
rmse = mean_squared_error(y_test, predictions, squared=False)
mae = mean_absolute_error(y_test, predictions)

print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)

import matplotlib.pyplot as plt


# Get R-squared value
r_squared = model.score(X_test, y_test)
print("R-squared:", r_squared)




Mean Squared Error: 8.369012013302035
Root Mean Squared Error: 2.8929244741786873
Mean Absolute Error: 2.1072191665582714
R-squared: 0.5662986352801112


In [4]:
import numpy as np

# Define a tolerance level (e.g., ±8% of the actual price)
tolerance = 0.5

# Calculate the absolute percentage error for each prediction
absolute_percentage_error = np.abs(predictions - y_test) / y_test

# Calculate the percentage of predictions within the tolerance
percentage_within_tolerance = np.mean(absolute_percentage_error <= tolerance) * 100

print("Percentage of predictions within ±5=% of actual prices: {:.2f}%".format( percentage_within_tolerance))


Percentage of predictions within ±5=% of actual prices: 84.42%
