# Case study: predict best price for VW Passat B6 TDI based on real offers

### Settings

In [20]:
percent_of_test_data = 40
main_url = 'https://www.otomoto.pl/osobowe/volkswagen/passat/b6-2005-2010/?search%5Bfilter_enum_fuel_type%5D%5B0%5D=diesel&search%5Bfilter_enum_damaged%5D=0&search%5Bfilter_enum_registered%5D=1&search%5Bcountry%5D='

### Prepare environment

In [21]:
import sys
sys.path.append('/Users/tomek/OneDrive/Projects/Machine Learning/passat-b6-tdi')

import urllib.request
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import car_price_prediction

### Count pages with offers

In [22]:
pages = car_price_prediction.count_pages(main_url)
print("Found %d pages" % pages)

Found 28 pages


### Collect samples from those pages

In [23]:
Xy = car_price_prediction.collect_data(main_url,pages)

print("Collected %d samples" % len(Xy))
#print(Xy.shape)
print("\nLast 10 samples:")
print(Xy[-10:])

Parsing page: 1

Parsing page: 2

Parsing page: 3

Parsing page: 4

Parsing page: 5

Parsing page: 6

Parsing page: 7

Parsing page: 8

Parsing page: 9

Parsing page: 10

Parsing page: 11

Parsing page: 12

Parsing page: 13

Parsing page: 14

Parsing page: 15

Parsing page: 16

Parsing page: 17

Parsing page: 18

Parsing page: 19

Parsing page: 20

Parsing page: 21

Parsing page: 22

Parsing page: 23

Parsing page: 24

Parsing page: 25

Parsing page: 26

Parsing page: 27

Parsing page: 28



Collected 869 samples

Last 10 samples:
[[2008.00 167000.00 1968.00 29900.00]
 [2007.00 300000.00 1896.00 14000.00]
 [2007.00 164000.00 1968.00 25900.00]
 [2007.00 236000.00 1896.00 25900.00]
 [2008.00 185600.00 1968.00 22500.00]
 [2010.00 197486.00 1968.00 33700.00]
 [2008.00 164000.00 1968.00 27500.00]
 [2010.00 198700.00 1968.00 28900.00]
 [2005.00 212000.00 1896.00 23000.00]
 [2007.00 230000.00 1968.00 19000.00]]


### Divide samples to train and test

In [24]:
X_train, y_train, X_test, y_test = car_price_prediction.split_data(Xy, percent_of_test_data) 

print('Training samples: %d' % len(X_train))
print('Test samples: %d' % len(X_test))


Training samples: 521
Test samples: 348


### Let's train!

In [29]:
regr = linear_model.LinearRegression()
regr.fit(X_train, y_train)

np.set_printoptions(formatter={'float_kind': '{:f}'.format})
print('Interceptor: ', regr.intercept_)
print('Coefficients: ', regr.coef_)

Interceptor:  [-5001628.499573]
Coefficients:  [[2502.934000 -0.022212 2.995291]]


### Let's test

In [26]:
y_pred = regr.predict(X_test)

#print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
print('Variance score: %.2f' % r2_score(y_test, y_pred))

Variance score: 0.42


## What is the best price for... 

### Passat B6 1.9 TDI 2009 with 188 000 mileage

In [27]:
price_pred = regr.predict([[2010,180000, car_price_prediction.Capacity.cap_1_9.value]])
print('The best price for VW Passat B6 1.9 TDI 2009 with 188 000 mileage is '+str(round(price_pred[0][0],2))+' PLN')

The best price for VW Passat B6 1.9 TDI 2009 with 188 000 mileage is 30949.73 PLN


### Passat B6 2.0 TDI 2006 with 288 000 mileage

In [28]:
price_pred = regr.predict([[2006,288000, car_price_prediction.Capacity.cap_2_0.value]])
print('The best price for VW Passat B6 2.0 TDI 2006 with 288 000 mileage is '+str(round(price_pred[0][0],2))+' PLN')

The best price for VW Passat B6 2.0 TDI 2006 with 288 000 mileage is 18754.75 PLN
