In [None]:
#Task 1
#The price of a car depends on a lot of factors like the goodwill of the brand of the car,
#features of the car, horsepower and the mileage it gives and many more. Car price
#prediction is one of the major research areas in machine learning. So if you want to learn
#how to train a car price prediction model then this project is for you.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('./Oibsip/car_data.csv')

print(data.head())
print(data.isnull().sum())

le = LabelEncoder()
data['Fuel_Type'] = le.fit_transform(data['Fuel_Type'])
data['Selling_type'] = le.fit_transform(data['Selling_type'])
data['Transmission'] = le.fit_transform(data['Transmission'])

X = data.drop(['Car_Name', 'Selling_Price'], axis=1)
y = data['Selling_Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7, color='b')
plt.xlabel('Actual Selling Price')
plt.ylabel('Predicted Selling Price')
plt.title('Actual vs Predicted Selling Price')
plt.show()

fuel_mapping = {0: 'Diesel', 1: 'Petrol', 2: 'CNG'}
data['Fuel_Type_Label'] = data['Fuel_Type'].map(fuel_mapping)

plt.figure(figsize=(10, 6))
sns.barplot(x='Fuel_Type_Label', y='Selling_Price', data=data, palette='viridis')
plt.title('Average Selling Price by Fuel Type')
plt.xlabel('Fuel Type')
plt.ylabel('Average Selling Price')
plt.show()

transmission_counts = data['Transmission'].value_counts()
transmission_labels = ['Manual', 'Automatic']

plt.figure(figsize=(8, 8))
plt.pie(transmission_counts, labels=transmission_labels, autopct='%1.1f%%', colors=sns.color_palette('Set3'))
plt.title('Distribution of Transmission Types')
plt.show()
