<a href="https://colab.research.google.com/github/LoloziAnas/deeplearning/blob/master/PredictingThePriceOfUsedCars_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
%inline matplotlib

In [7]:
# Import the dataset.
dataset = pd.read_csv('toyota.csv')
dataset.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,GT86,2016,16000,Manual,24089,Petrol,265,36.2,2.0
1,GT86,2017,15995,Manual,18615,Petrol,145,36.2,2.0
2,GT86,2015,13998,Manual,27469,Petrol,265,36.2,2.0
3,GT86,2017,18998,Manual,14736,Petrol,150,36.2,2.0
4,GT86,2017,17498,Manual,36284,Petrol,145,36.2,2.0


In [8]:
# Cleaning the data.

print(dataset['transmission'].unique())
#['Manual' 'Automatic' 'Semi-Auto' 'Other']

#Exclude the case that contains Other.
dataset = dataset[dataset.transmission != 'Other']

print(dataset['fuelType'].unique())
#['Petrol' 'Other' 'Hybrid' 'Diesel']

#Exclude the case that contains Other.
dataset = dataset[dataset.fuelType != 'Other']

# Replace the qualitative with quantitative variables.

#====================Transmission========================
dataset['transmission'].replace('Manual',0,inplace=True)
dataset['transmission'].replace('Automatic',1,inplace=True)
dataset['transmission'].replace('Semi-Auto',2,inplace=True)

#====================Fuel type============================
dataset['fuelType'].replace('Petrol',0,inplace=True)
dataset['fuelType'].replace('Diesel',1,inplace=True)
dataset['fuelType'].replace('Hybrid',2,inplace=True)

# Changing the type of the features fuelType and transmission from Series to integer.
dataset['fuelType']=dataset['fuelType'].astype(int)
dataset['transmission']=dataset['transmission'].astype(int)

# Splitting data to features and target.
feature_cols=['year','transmission','mileage','tax','mpg','engineSize']
x = dataset[feature_cols]
y = dataset['price']

['Manual' 'Automatic' 'Semi-Auto' 'Other']
['Petrol' 'Other' 'Hybrid' 'Diesel']


In [9]:
# Splitting data into training and test data.
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=42,test_size=0.2)

In [None]:

from sklearn import svm
from sklearn.model_selection import GridSearchCV

clf = svm.SVR()
# defining parameter range 
param_grid = {'C': [ i for i in range(1,10)],'gamma': [1, 0.1, 0.01, 0.001, 0.0001],'epsilon':[i*0.1 for i  in range(1,10)],'kernel': ['rbf']}  

parameters = {'C': [1000],'gamma': [0.001],'epsilon':[0.1],'kernel': ['rbf']}  

paramgrid = {"kernel": ["rbf"],
             "C"     : np.logspace(-9, 9, num=25, base=10),
             "gamma" : np.logspace(-9, 9, num=25, base=10)}

regr = GridSearchCV(clf,parameters, refit = True, verbose = 1)

# fitting the model for grid search 
%time regr.fit(x_train,y_train)


In [None]:
#**Using RandomizedSearchCV**

#Same parameter space, but only test 250 random combinations.
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold

regr = RandomizedSearchCV(clf,
                        param_distributions=paramgrid,
                        n_iter=250,
                        cv=StratifiedKFold(n_splits=2),
                        verbose=1)
regr.fit(x_train,y_train)



Fitting 2 folds for each of 250 candidates, totalling 500 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


In [47]:

# print best parameter after tuning 
print(regr.best_params_) 
  
# print how our model looks after hyper-parameter tuning 
print(regr.best_estimator_) 


{'C': 1000, 'epsilon': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}
SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)


In [48]:
pred_y_ = regr.predict(x_test)
pred_y_
y_test

regr.score(x_train,y_train)

0.13716727372563042

In [50]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from math import sqrt

print("R2-score: %.2f" % r2_score(pred_y_ , y_test) )
print(" (MSE): %.2f" % sqrt(mean_squared_error(y_test, pred_y_))/
print("Mean absolute error: %.2f" % np.mean(np.absolute(pred_y_ - y_test)))


R2-score: -45.69
 (MSE): 41437078.70
Mean absolute error: 4390.67
