In [1]:
import pandas as pd 
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("carprices.csv")
df.shape

(13, 4)

In [3]:
# check presence of null value 
df.isna().sum()

Car Model        0
Mileage          0
Sell Price($)    0
Age(yrs)         0
dtype: int64

In [4]:
# check datatype of each column
df.dtypes

Car Model        object
Mileage           int64
Sell Price($)     int64
Age(yrs)          int64
dtype: object

In [5]:
df.head()

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4


In [10]:
# transform the Car Model to numerical as computer only understand numerical values
dummy = pd.get_dummies(df['Car Model'], dtype=int)
dummy.head()

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0


In [12]:
# concatenate the abvoe dummy value to the original dataframe and store it in new dataframe
merged = pd.concat([df, dummy], axis='columns')
merged.head()

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5,Mercedez Benz C class
0,BMW X5,69000,18000,6,0,1,0
1,BMW X5,35000,34000,3,0,1,0
2,BMW X5,57000,26100,5,0,1,0
3,BMW X5,22500,40000,2,0,1,0
4,BMW X5,46000,31500,4,0,1,0


In [14]:
# Let's drop one of the value of Car Model as we can predict using the remaining column for that column and also Car Model
final = merged.drop(["Mercedez Benz C class", "Car Model"], axis=1)
final.head()

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5
0,69000,18000,6,0,1
1,35000,34000,3,0,1
2,57000,26100,5,0,1
3,22500,40000,2,0,1
4,46000,31500,4,0,1


In [16]:
final.columns

Index(['Mileage', 'Sell Price($)', 'Age(yrs)', 'Audi A5', 'BMW X5'], dtype='object')

In [19]:
# split the values into dependent variable 'y' and 'non dependent variable'
X = final[['Mileage', 'Age(yrs)', 'Audi A5', 'BMW X5']].values
y = final['Sell Price($)']

In [15]:
# import LinearRegression from machine learning library sklearn
from sklearn.linear_model import LinearRegression

In [20]:
lrg = LinearRegression()
lrg.fit(X, y)

In [21]:
lrg.predict(X)

array([18705.2723644 , 35286.78445645, 24479.19112468, 41245.76426391,
       29882.98779056, 28023.6135243 , 30614.46818502, 21879.57266964,
       12182.34562104, 26183.72387884, 18929.31674102, 20409.80511857,
       30477.15426156])

In [25]:
final[['Mileage', 'Age(yrs)', 'Audi A5', 'BMW X5']].columns

Index(['Mileage', 'Age(yrs)', 'Audi A5', 'BMW X5'], dtype='object')

In [23]:
# 1 predict price of mercedez benz that is 4 year old with mileage 45000
lrg.predict([[45000, 4, 0, 0]])

array([36991.31721061])

In [26]:
# predict price of BMW X5 that is 7 yrs old with mileage 86000
lrg.predict([[86000, 7, 0, 1]])

array([11080.74313219])

In [27]:
# score of the model
lrg.score(X, y)

0.9417050937281083