<a href="https://www.kaggle.com/code/tiheli/audi-price-prediction-as-train-test-split?scriptVersionId=205977308" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/audi-a1-listings/Audi_A1_listings.csv


In [2]:
# Loading a new dataset containing Audi A1 listings from a CSV file into a DataFrame
df = pd.read_csv('/kaggle/input/audi-a1-listings/Audi_A1_listings.csv')

In [3]:
# Displaying the first three rows of the DataFrame to get an initial look at the data structure and sample values
df.head(3)

Unnamed: 0,index,Year,Type,Mileage(miles),Engine,PS,Transmission,Fuel,Number_of_Owners,Price(£),href,PPY,MileageRank,PriceRank,PPYRank,Score
0,0,2018.0,Hatchback,44000.0,1.6L,114.398422,Manual,Diesel,1,14995.0,https://www.autotrader.co.uk/car-details/20221...,2499.166667,215,163,340,718
1,4,2016.0,Hatchback,42596.0,1.0L,93.688363,Manual,Petrol,3,10755.0,https://www.autotrader.co.uk/car-details/20221...,2688.75,222,330,276,828
2,7,2015.0,Hatchback,42700.0,1.4L,123.274162,Manual,Petrol,2,10799.0,https://www.autotrader.co.uk/car-details/20221...,3599.666667,221,327,94,642


In [4]:
# Dropping unnecessary columns that are not relevant for analysis or model training
# Columns removed include 'index', 'href', 'MileageRank', 'PriceRank', 'PPYRank', and 'Score'
df = df.drop(columns=['index', 'href', 'MileageRank', 'PriceRank', 'PPYRank', 'Score'])

In [5]:
# Removing the "L" character from the 'Engine' column to keep only numeric values for easier analysis and modeling
df['Engine'] = df['Engine'].str.replace("L", "")

In [6]:
# Converting the 'Engine' column to a numeric data type to enable numerical operations and modeling
df['Engine'] = pd.to_numeric(df['Engine'])

In [7]:
# Converting categorical columns 'Type', 'Transmission', and 'Fuel' into dummy/indicator variables
# Using 'drop_first=True' to avoid multicollinearity by removing the first category in each column
df = pd.get_dummies(df, columns=['Type', 'Transmission', 'Fuel'], drop_first=True)

In [8]:
# Importing LinearRegression for building a regression model and train_test_split for splitting the dataset
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [9]:
# Separating the target variable 'Price(£)' as 'y' for prediction
# Assigning all other features to 'x' to use as inputs for the model
y = df[['Price(£)']]
x = df.drop("Price(£)", axis=1)

In [10]:
# Splitting the data into training and testing sets with 70% of data for training and 30% for testing
# Setting a random_state for reproducibility of results
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.70, random_state=22)

In [11]:
# Initializing the Linear Regression model
lm = LinearRegression()
# Fitting the model with the training data to learn relationships between features and target
model = lm.fit(x_train, y_train)
# Evaluating the model's accuracy on the test set
model.score(x_test, y_test)

0.9563152741632539

In [12]:
# Making a prediction for a new data point using the trained model
# Input values represent a specific Audi car's features like year, mileage, engine size, etc.
model.predict([[2016, 90000, 1.0, 100, 4, 3300, 0, 1]])



array([[11478.49505317]])

In [13]:
# Making a prediction for a different car based on its features (year, mileage, engine size, etc.)
model.predict([[2018, 45000, 1.4, 130, 2, 2500, 1, 0]])



array([[15928.31318164]])