In [1]:
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn.linear_model import LinearRegression, ElasticNet, Ridge, Lasso, RANSACRegressor, TheilSenRegressor, HuberRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error, mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingRegressor
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')


In [2]:
df = pd.read_csv("car_prices.csv")
df.head()

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,ext_col,int_col,accident,clean_title,price
0,0,Ford,F-150 Lariat,2018,74349,Gasoline,375.0HP 3.5L V6 Cylinder Engine Gasoline Fuel,10-Speed A/T,Blue,Gray,None reported,Yes,11000
1,1,BMW,335 i,2007,80000,Gasoline,300.0HP 3.0L Straight 6 Cylinder Engine Gasoli...,6-Speed M/T,Black,Black,None reported,Yes,8250
2,2,Jaguar,XF Luxury,2009,91491,Gasoline,300.0HP 4.2L 8 Cylinder Engine Gasoline Fuel,6-Speed A/T,Purple,Beige,None reported,Yes,15000
3,3,BMW,X7 xDrive40i,2022,2437,Hybrid,335.0HP 3.0L Straight 6 Cylinder Engine Gasoli...,Transmission w/Dual Shift Mode,Gray,Brown,None reported,Yes,63500
4,4,Pontiac,Firebird Base,2001,111000,Gasoline,200.0HP 3.8L V6 Cylinder Engine Gasoline Fuel,A/T,White,Black,None reported,Yes,7850


In [3]:
df = df[~((df["price"] < df["price"].quantile(0.05)) | (df["price"] > df["price"].quantile(0.95)))]

In [4]:
X = df[["milage"]]
y = df['price'].values

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
linear = LinearRegression()

linear.fit(X_train, y_train)
line_pred = linear.predict(X_test)

print(mean_squared_error(line_pred, y_test, squared=False))
print(mean_absolute_error(line_pred, y_test))
print(median_absolute_error(line_pred, y_test))
print(mean_absolute_percentage_error(line_pred, y_test))

15589.430906044827
11911.61973697553
9657.122422216224
0.6676123484732954


In [7]:
rid = Ridge()

rid.fit(X_train, y_train)
rid_pred = rid.predict(X_test)

print(mean_squared_error(rid_pred, y_test, squared=False))
print(mean_absolute_error(rid_pred, y_test))
print(median_absolute_error(rid_pred, y_test))

15589.430906044827
11911.61973697553
9657.122422216049


In [8]:
las = Lasso()

las.fit(X_train, y_train)
las_pred = las.predict(X_test)

print(mean_squared_error(las_pred, y_test, squared=False))
print(mean_absolute_error(las_pred, y_test))
print(median_absolute_error(las_pred, y_test))
print(mean_absolute_percentage_error(las_pred, y_test))

15589.430906129357
11911.61973734881
9657.122381744724
0.6676123265168902


In [9]:
hub = HuberRegressor()

hub.fit(X_train, y_train)
hub_pred = hub.predict(X_test)

print(mean_squared_error(hub_pred, y_test, squared=False))
print(mean_absolute_error(hub_pred, y_test))
print(median_absolute_error(hub_pred, y_test))
print(mean_absolute_percentage_error(hub_pred, y_test))

31790.772783852783
24485.818631238046
18848.96852024395
10.991192093482624


In [10]:
elastic = ElasticNet()

elastic.fit(X_train, y_train)
elastic_pred = elastic.predict(X_test)

print(mean_squared_error(elastic_pred, y_test, squared=False))
print(mean_absolute_error(elastic_pred, y_test))
print(median_absolute_error(elastic_pred, y_test))
print(mean_absolute_percentage_error(elastic_pred, y_test))

15589.430906097603
11911.619737208586
9657.12239694795
0.6676123347648663


In [11]:
theil = TheilSenRegressor()

theil.fit(X_train, y_train)
theil_pred = theil.predict(X_test)

print(mean_squared_error(theil_pred, y_test, squared=False))
print(mean_absolute_error(theil_pred, y_test))
print(median_absolute_error(theil_pred, y_test))
print(mean_absolute_percentage_error(theil_pred, y_test))

16621.58360236339
12056.151739762092
8573.924609170219
1.75022628352549


In [12]:
rancar = RANSACRegressor()

rancar.fit(X_train, y_train)
rancar_pred = rancar.predict(X_test)

print(mean_squared_error(rancar_pred, y_test, squared=False))
print(mean_absolute_error(rancar_pred, y_test))
print(median_absolute_error(rancar_pred, y_test))
print(mean_absolute_percentage_error(rancar_pred, y_test))

17228.151207052924
12209.315460714794
8088.088631740477
1.085114176051718


In [13]:
pred = [line_pred, hub_pred, las_pred, rancar_pred, rid_pred, theil_pred, elastic_pred]
pred_df = pd.DataFrame(np.column_stack(pred), columns=["linear", "huber", "lasso", "RANSAC", "ridge", "theil", "elastic"])

pred_df.head()

Unnamed: 0,linear,huber,lasso,RANSAC,ridge,theil,elastic
0,33170.941779,13257.254199,33170.941777,25907.745218,33170.941779,28164.779343,33170.941778
1,36777.035786,10388.14695,36777.035778,29416.669307,36777.035786,32455.400311,36777.035781
2,3078.709032,37199.459523,3078.709083,-3373.621321,3078.709032,-7639.712874,3078.709063
3,33668.334056,12861.515268,33668.334053,26391.734747,33668.334056,28756.589131,33668.334054
4,27403.927026,17845.649234,27403.927034,20296.128617,27403.927026,21303.040747,27403.927031


In [14]:
pred_df['mean'] = pred_df.mean(axis=1)
pred_df.head()

Unnamed: 0,linear,huber,lasso,RANSAC,ridge,theil,elastic,mean
0,33170.941779,13257.254199,33170.941777,25907.745218,33170.941779,28164.779343,33170.941778,28573.363696
1,36777.035786,10388.14695,36777.035778,29416.669307,36777.035786,32455.400311,36777.035781,31338.3371
2,3078.709032,37199.459523,3078.709083,-3373.621321,3078.709032,-7639.712874,3078.709063,5500.137362
3,33668.334056,12861.515268,33668.334053,26391.734747,33668.334056,28756.589131,33668.334054,28954.739338
4,27403.927026,17845.649234,27403.927034,20296.128617,27403.927026,21303.040747,27403.927031,24151.503816


In [15]:
print(mean_squared_error(pred_df['mean'], y_test, squared=False))
print(mean_absolute_error(pred_df['mean'], y_test))
print(median_absolute_error(pred_df['mean'], y_test))
print(mean_absolute_percentage_error(pred_df['mean'], y_test ))

16441.331041432604
11826.798629412668
8283.833449970882
0.6011009506126183


In [16]:
stack = StackingRegressor(estimators=[
    ("TheilSen", TheilSenRegressor()),
    ("Linear", LinearRegression()),
    ("Ridge", Ridge()),
    ("Lasso", Lasso()),
    ("Elastic", ElasticNet()),
    ("Huber", HuberRegressor()),
    ("Ransacr", RANSACRegressor())
])

stack.fit(X_train, y_train)
stck_pred =  stack.predict(X_test)

In [17]:
print(mean_squared_error(stck_pred, y_test, squared=False))
print(mean_absolute_error(stck_pred, y_test))
print(median_absolute_error(stck_pred, y_test))
print(mean_absolute_percentage_error(stck_pred, y_test ))

15593.774828992837
11863.478973611025
9554.870425273904
1.3930859013215868


'6823'