In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn import metrics

Data Collecting and Processing


In [None]:
# loading the data using pandas library
car_dataset = pd.read_csv('/content/car data.csv')

In [None]:
# inspecting the first 5 rows of the dataframe
car_dataset.head()

In [None]:
#checking total data points
car_dataset.shape

In [None]:
# getting information about the car dataset
car_dataset.info()

In [None]:
# checking the number of missing values
# according to the result we don't have any missing values.
car_dataset.isnull().sum()

In [None]:
# checking the distribution of categorical data
# eg : we are checking out of 301 entries how many of them are petrol types or desiel types.
print(car_dataset.Fuel_Type.value_counts())
print(car_dataset.Seller_Type.value_counts())
print(car_dataset.Transmission.value_counts())

In [None]:
# encoding "Year" Column
car_dataset.replace({'Fuel_Type':{'Petrol':0,'Diesel':1,'CNG':2}},inplace=True)

# encoding "Seller_Type" Column
car_dataset.replace({'Seller_Type':{'Dealer':0,'Individual':1}},inplace=True)

# encoding "Transmission" Column
car_dataset.replace({'Transmission':{'Manual':0,'Automatic':1}},inplace=True)

In [None]:
car_dataset.head()

In [None]:
#removing car_name and selling price
X = car_dataset.drop(['Car_Name','Selling_Price'],axis=1)

Y = car_dataset['Selling_Price']

In [None]:
print(X)

In [None]:
print(Y)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state=2)
#x_train = training data, x _test = all the testing data, y_train = price of all the values in x_train, y_test = price of all the values in x_test

In [None]:
# loading the linear regression model
lin_reg_model = LinearRegression()

In [None]:
lin_reg_model.fit(X_train,Y_train)

In [None]:
# prediction on Training data
training_data_prediction = lin_reg_model.predict(X_train)

In [None]:
# R squared Error
error_score = metrics.r2_score(Y_train, training_data_prediction)
print("R squared Error : ", error_score)

In [None]:
plt.scatter(Y_train, training_data_prediction)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title(" Actual Prices vs Predicted Prices")
plt.show()

In [None]:
# prediction on Training data
test_data_prediction = lin_reg_model.predict(X_test)

In [None]:
# R squared Error
error_score = metrics.r2_score(Y_test, test_data_prediction)
print("R squared Error : ", error_score)

In [None]:
# Scatter plot
plt.scatter(Y_test, test_data_prediction)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title(" Actual Prices vs Predicted Prices")
plt.show()


# Bar diagram
plt.subplot(1, 2, 2)
bar_width = 0.35
index = np.arange(len(Y_test))
plt.bar(index, Y_test, width=bar_width, label='Actual Price', alpha=0.7)
plt.bar(index + bar_width, test_data_prediction, width=bar_width, label='Predicted Price', alpha=0.7)

plt.xlabel("Index")
plt.ylabel("Price")
plt.title("Bar Diagram: Actual Prices vs Predicted Prices")
plt.legend()

plt.tight_layout()  # Ensures that the subplots do not overlap
plt.show()

In [None]:
# loading the linear regression model
lass_reg_model = Lasso()

In [None]:
lass_reg_model.fit(X_train,Y_train)

In [None]:
# prediction on Training data
training_data_prediction = lass_reg_model.predict(X_train)

In [None]:
# R squared Error
error_score = metrics.r2_score(Y_train, training_data_prediction)
print("R squared Error : ", error_score)

In [None]:
#Visualize the actual prices and predction price
plt.scatter(Y_train, training_data_prediction)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title(" Actual Prices vs Predicted Prices")
plt.show()


# Bar diagram
bar_width = 0.35
index = range(len(Y_train))
plt.bar(index, Y_train, width=bar_width, label='Actual Price', alpha=0.7)
plt.bar(index, training_data_prediction, width=bar_width, label='Predicted Price', alpha=0.7)

plt.xlabel("Index")
plt.ylabel("Price")
plt.title("Bar Diagram: Actual Prices vs Predicted Prices (Training Data)")
plt.legend()

plt.show()

In [None]:
# prediction on Training data
test_data_prediction = lass_reg_model.predict(X_test)

In [None]:
# R squared Error
error_score = metrics.r2_score(Y_test, test_data_prediction)
print("R squared Error : ", error_score)

In [None]:
plt.scatter(Y_test, test_data_prediction)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title(" Actual Prices vs Predicted Prices")
plt.show()