# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/sahilrahman12/Price_prediction_of_used_Cars_-Predictive_Analysis-/master/car_data.csv')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.ndim

In [None]:
df.columns

In [None]:
df.fuel.unique()

In [None]:
df.seller_type.unique()

In [None]:
df.transmission.unique()

In [None]:
df.owner.unique()

In [None]:
df.owner.value_counts()

In [None]:
df.year.unique()

In [None]:
pd.set_option('future.no_silent_downcasting', True)

# Feature Engineering
label encoding the columns values

In [None]:
df_copy = df.copy()
df_copy.insert(df.columns.get_loc('fuel'), column='new_fuel', value=df_copy.fuel.replace({"Petrol": 0, "Diesel": 1, "CNG": 2, "LPG": 3, "Electric": 4}))
df_copy.head()

In [None]:
df_copy.new_fuel.unique()

In [None]:
df_copy.insert(df.columns.get_loc('seller_type')+1, column='new_seller_type', value=df_copy.seller_type.replace({"Individual": 0, "Dealer": 1, "Trustmark Dealer": 2}))
df_copy.head()

In [None]:
df_copy.new_seller_type.unique()

# Encoding and Labeling columns values by package

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
new_df = df_copy.copy()

In [None]:
new_df['fuel'] = LabelEncoder().fit_transform(df_copy.fuel)

In [None]:
new_df['fuel'].unique()

using one-hot encoding

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
enc = OneHotEncoder(handle_unknown='ignore')
enc.fit(new_df)
new_df

In [None]:
df['seller_type'] = LabelEncoder().fit_transform(df.seller_type)
df['fuel'] = LabelEncoder().fit_transform(df.fuel)
df['transmission'] = LabelEncoder().fit_transform(df.transmission)
df['owner'] = LabelEncoder().fit_transform(df.owner)

In [None]:
df.head()

In [None]:
df['no_of_years'] = 2024 - df['year']
df.head()

In [None]:
df.rename({"selling_price": "current_price"}, axis='columns', inplace=True)
df.drop(['name', 'year'], axis='columns', inplace=True)
df.head()

In [None]:
df.info()

In [None]:
df.corr()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True)

In [None]:
df.boxplot()

# selecting the features and target

In [None]:
x = np.array(df.drop('current_price', axis=1))
y = np.array(df['current_price'])

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [None]:
regressor = DecisionTreeRegressor()
regressor.fit(x_train, y_train)

In [None]:
y_pred = regressor.predict(x_test)

# Testing the model

In [None]:
target = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
target.head()

In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Decision Tree Regression')

In [None]:
plt.scatter(x_test, y_pred, color='red')
plt.scatter(x_test, y_test, color='blue')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Decision Tree Regression')

# Performance Matrix

In [None]:
r2_score(y_test, y_pred)