# Capstone Two - Modeling

## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score

## Load the data from Pre-processing

In [2]:
car_data = pd.read_csv('car_datapre.csv')
car_data.head()

Unnamed: 0.1,Unnamed: 0,Price,Year,Mileage,Age,City,State,Make,Model
0,0,8995,2014,35725,8,646,43,1,1194
1,1,10888,2013,19606,9,1260,34,1,1193
2,2,8995,2013,48851,9,646,43,1,1194
3,3,10999,2014,39922,8,2490,5,1,1193
4,4,14799,2016,22142,6,1231,44,1,1196


## Train-test-split

In [3]:
X = car_data.drop('Price',axis=1).values
y = car_data['Price'].values

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,  test_size = 0.2, random_state=42)

## Standardization

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train) 
X_test = scaler.transform(X_test)
print(np.mean(X), np.std(X))
print(np.mean(X_train), np.std(X_train))

60392.0492245849 164837.33171005215
3.930411299984994e-15 1.0000000000004121


## Modeling

### Linear Regression

In [6]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

In [7]:
score = r2_score(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
print('R2 score for Linear Regression model is ', score)
print('MSE for Linear Regression model is', mse)

R2 score for Linear Regression model is  0.21404276807879308
MSE for Linear Regression model is 143960843.69756767


### Random Forest Regressor

In [8]:
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [9]:
score = r2_score(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
print('R2 score for Random Forest model is ', score)
print('MSE for Random Forest model is', mse)

R2 score for Random Forest model is  0.9662798070787618
MSE for Random Forest model is 6176401.495435131


### Adaptive Boosting Regressor

In [10]:
ada=AdaBoostRegressor(random_state=42)
ada.fit(X_train, y_train)
y_pred = ada.predict(X_test)

In [11]:
score = r2_score(y_test,y_pred)
mse = mean_squared_error(y_test,y_pred)
print('R2 score for AdaBoost model is ', score)
print('MSE for AdaBoost model is', mse)

R2 score for AdaBoost model is  -0.515302390098759
MSE for AdaBoost model is 277552265.79227835
