# [Presentation](https://tome.app/mh0386/revving-up-the-future-ai-powered-used-car-price-predictions-clgtjqlbx17hp1u41lb7yr442)

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.preprocessing import PolynomialFeatures

# Data

## Read Data

In [None]:
try:
    data = pd.read_csv('/kaggle/input/cars-dataset-audi-bmw-ford-hyundai-skoda-vw/cars_dataset.csv')
except FileNotFoundError:
    data = pd.read_csv('cars_dataset.csv')
data

## Info

In [None]:
data.info()

## Describe

In [None]:
data.describe()

## Nulls

In [None]:
data.isnull().sum()

## Data Columns

In [None]:
data.columns

# Draw

In [None]:
a = sns.pairplot(data)
type(a)

## Histograms

In [None]:
for col in data.columns:
    if col == 'model':
        fig, ax = plt.subplots(figsize=(30, 10))
    else:
        fig, ax = plt.subplots(figsize=(20, 10))
    ax.tick_params(axis='x', rotation=90)
    plt.title(f'{col} histogram')
    sns.histplot(data=data, x=col, ax=ax)
    plt.show()

## Heatmap

In [None]:
sns.heatmap(data.corr(), annot=True)

In [None]:
def standardize(x):
    return (x - np.mean(x)) / np.std(x)

In [None]:
data.model = preprocessing.LabelEncoder().fit_transform(data.model)
data.transmission = preprocessing.LabelEncoder().fit_transform(data.transmission)
data.fuelType = preprocessing.LabelEncoder().fit_transform(data.fuelType)
data.Make = preprocessing.LabelEncoder().fit_transform(data.Make)
data

In [None]:
data.mileage = standardize(data.mileage)
data

In [None]:
data.describe()

# Split Data into X and y

In [None]:
data = shuffle(data)
y = data.price
data.drop('price', axis=1, inplace=True)
X = data

## Features

In [None]:
X

## Labels

In [None]:
y

# Linear Regression Models

In [None]:
r2 = []
mse = []
mae = []
mape = []
accuracy = []
for i in range(1, 8):
    poly = PolynomialFeatures(degree=i)
    X_poly = poly.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=0)
    Model = LinearRegression()
    Model.fit(X_train, y_train)
    y_pred = Model.predict(X_test)

    r2.append(round(r2_score(y_test, y_pred), 2))
    mse.append(round(mean_squared_error(y_test, y_pred), 2))
    mae.append(round(mean_absolute_error(y_test, y_pred), 2))
    mape.append(round(mean_absolute_percentage_error(y_test, y_pred), 2))
    accuracy.append(round(Model.score(X_test, y_test), 2))

    print(f'Degree {i}')
    print(f'R2 Score: {r2[-1]}')
    print(f'Mean Squared Error: {mse[-1]}')
    print(f'Mean Absolute Error: {mae[-1]}')
    print(f'Mean Absolute Percentage Error: {mape[-1]}')
    print(f'Accuracy: {accuracy[-1]}')

    Y_Test_Pred = pd.DataFrame({"Y_Test": y_test, "Y_Pred": y_pred})
    Y_Test_Pred = Y_Test_Pred.reset_index()
    plt.figure(figsize=(20, 10))
    Y_Test_Pred = Y_Test_Pred.drop(["index"], axis=1)
    plt.plot(Y_Test_Pred)
    plt.legend(["Actual", "Predicted"])
    plt.show()
    print()
    print()

# Plot R2 Score, Mean Squared Error, Mean Absolute Error

## Plot without standardization

In [None]:
r2 = np.array(r2)
mse = np.array(mse)
mae = np.array(mae)
mape = np.array(mape)
fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(np.linspace(1, 7, 7), r2, label='R2 Score')
ax.plot(np.linspace(1, 7, 7), mse, label='Mean Squared Error')
ax.plot(np.linspace(1, 7, 7), mae, label='Mean Absolute Error')
ax.plot(np.linspace(1, 7, 7), mape, label='Mean Absolute Percentage Error')
ax.plot(np.linspace(1, 7, 7), accuracy, label='Accuracy')
ax.set_xlabel('Degree')
ax.set_ylabel('Score')
plt.legend()
plt.show()

## Plot with standardization

In [None]:
r2 = standardize(r2)
mse = standardize(mse)
mae = standardize(mae)
mape = standardize(mape)
fig, ax = plt.subplots(figsize=(20, 10))
ax.plot(np.linspace(1, 7, 7), r2, label='R2 Score')
ax.plot(np.linspace(1, 7, 7), mse, label='Mean Squared Error')
ax.plot(np.linspace(1, 7, 7), mae, label='Mean Absolute Error')
ax.plot(np.linspace(1, 7, 7), mape, label='Mean Absolute Percentage Error')
ax.plot(np.linspace(1, 7, 7), accuracy, label='Accuracy')
ax.set_xlabel('Degree')
ax.set_ylabel('Score')
plt.legend()
plt.show()