<a href="https://colab.research.google.com/github/Alphaomegainfinity/Project-4_Car_Price_Predictor/blob/main/car_price_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Import and Cleaning Data**

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf

In [19]:
# Import our input dataset
car_df = pd.read_csv('https://raw.githubusercontent.com/Alphaomegainfinity/Project-4_Car_Price_Predictor/main/Resources/Car%20details%20v3.csv')
car_df.head()



Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14 kmpl,1498 CC,103.52 bhp,250Nm@ 1500-2500rpm,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7 kmpl,1497 CC,78 bhp,"12.7@ 2,700(kgm@ rpm)",5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0 kmpl,1396 CC,90 bhp,22.4 kgm at 1750-2750rpm,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1 kmpl,1298 CC,88.2 bhp,"11.5@ 4,500(kgm@ rpm)",5.0


In [3]:
# Check shape of dataframe
car_df.shape

(8128, 13)

In [4]:
# Check info of dataframe
car_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8128 entries, 0 to 8127
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           8128 non-null   object 
 1   year           8128 non-null   int64  
 2   selling_price  8128 non-null   int64  
 3   km_driven      8128 non-null   int64  
 4   fuel           8128 non-null   object 
 5   seller_type    8128 non-null   object 
 6   transmission   8128 non-null   object 
 7   owner          8128 non-null   object 
 8   mileage        7907 non-null   object 
 9   engine         7907 non-null   object 
 10  max_power      7913 non-null   object 
 11  torque         7906 non-null   object 
 12  seats          7907 non-null   float64
dtypes: float64(1), int64(3), object(9)
memory usage: 825.6+ KB


In [5]:
# Drop unwanted columns 
car_df.drop(['name', 'torque', 'max_power', 'mileage'], axis=1, inplace=True)

In [6]:
car_df

Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,engine,seats
0,2014,450000,145500,Diesel,Individual,Manual,First Owner,1248 CC,5.0
1,2014,370000,120000,Diesel,Individual,Manual,Second Owner,1498 CC,5.0
2,2006,158000,140000,Petrol,Individual,Manual,Third Owner,1497 CC,5.0
3,2010,225000,127000,Diesel,Individual,Manual,First Owner,1396 CC,5.0
4,2007,130000,120000,Petrol,Individual,Manual,First Owner,1298 CC,5.0
...,...,...,...,...,...,...,...,...,...
8123,2013,320000,110000,Petrol,Individual,Manual,First Owner,1197 CC,5.0
8124,2007,135000,119000,Diesel,Individual,Manual,Fourth & Above Owner,1493 CC,5.0
8125,2009,382000,120000,Diesel,Individual,Manual,First Owner,1248 CC,5.0
8126,2013,290000,25000,Diesel,Individual,Manual,First Owner,1396 CC,5.0


In [7]:
# work for potentially adding in engine size and fuel consumption

car_df['engine'] = car_df['engine'].str.rsplit(' ', n=1).str.get(0)
#car_df['mileage'] = car_df['mileage'].str.rsplit(' ', n=1).str.get(0)
car_df.rename(columns={'engine':'engine(CC)', 'mileage':'mileage(kmpl)'}, inplace=True)
#car_df['engine(CC)'] = car_df['engine(CC)'].astype(float)
#car_df.loc[car_df['engine(CC)'] <= 1000 ,'engine(CC)'] = 1
#car_df.loc[car_df['engine(CC)'] >= 1001 & car_df['engine(CC)'] <=2000 ,'engine(CC)'] = 2

In [8]:
car_df['engine(CC)'] = car_df['engine(CC)'].astype(float)

In [9]:
engine_bin = [1,2,3,4]
car_df['engine(CC)_binning'] = pd.cut(car_df['engine(CC)'], [1000, 1999, 2999, 3999, 4000], labels = engine_bin)
car_df.head(200)

Unnamed: 0,year,selling_price,km_driven,fuel,seller_type,transmission,owner,engine(CC),seats,engine(CC)_binning
0,2014,450000,145500,Diesel,Individual,Manual,First Owner,1248.0,5.0,1
1,2014,370000,120000,Diesel,Individual,Manual,Second Owner,1498.0,5.0,1
2,2006,158000,140000,Petrol,Individual,Manual,Third Owner,1497.0,5.0,1
3,2010,225000,127000,Diesel,Individual,Manual,First Owner,1396.0,5.0,1
4,2007,130000,120000,Petrol,Individual,Manual,First Owner,1298.0,5.0,1
...,...,...,...,...,...,...,...,...,...,...
195,2005,65000,86000,Petrol,Individual,Manual,First Owner,1341.0,5.0,1
196,2020,720000,5000,Petrol,Individual,Manual,First Owner,1197.0,5.0,1
197,2017,800000,37000,Diesel,Individual,Manual,First Owner,1248.0,5.0,1
198,2019,520000,14000,Petrol,Individual,Manual,First Owner,1199.0,5.0,1


In [10]:
car_df['seats'] = car_df['seats'].astype(str)

In [11]:
car_df.dropna(inplace=True)

In [16]:
car_df = pd.get_dummies(car_df)
car_df.head()

Unnamed: 0,year,selling_price,km_driven,engine(CC),fuel_CNG,fuel_Diesel,fuel_LPG,fuel_Petrol,seller_type_Dealer,seller_type_Individual,...,seats_4.0,seats_5.0,seats_6.0,seats_7.0,seats_8.0,seats_9.0,engine(CC)_binning_1,engine(CC)_binning_2,engine(CC)_binning_3,engine(CC)_binning_4
0,2014,450000,145500,1248.0,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,0,0
1,2014,370000,120000,1498.0,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,0,0
2,2006,158000,140000,1497.0,0,0,0,1,0,1,...,0,1,0,0,0,0,1,0,0,0
3,2010,225000,127000,1396.0,0,1,0,0,0,1,...,0,1,0,0,0,0,1,0,0,0
4,2007,130000,120000,1298.0,0,0,0,1,0,1,...,0,1,0,0,0,0,1,0,0,0


In [14]:
car_df.to_csv("carcleandf", sep='\t')

## **Compile, Train and Evaluate the Model**

In [None]:
# Install keras_tuner
!pip install keras_tuner

In [None]:
# Import dependencies

import sklearn as skl
import keras_tuner as kt

In [None]:
# Split our preprocessed data into our features and target arrays

y = car_df["seller_type_Individual"]
X = car_df.drop(["sller_type_Individual", "seller_type_Dealer"], axis=1)

# Split the preprocessed data into a training and testing dataset

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42, stratify =y) 

In [None]:
# Create a StandardScaler instances
scaler = skl.preprocessing.StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Checking the number of feature:

number_input_feature = X_train.shape[1]
number_input_feature

In [None]:
# Create a method that creates a new Sequential model with hyperparameter options

def create_model(hp):
    nn_model = tf.keras.models.Sequential()

# Allow kerastuner to decide which activation function to use in hidden layers

    activation = hp. Choice('activation',['relu','tanh', 'sigmoid'])

# Allow kerastuner to decide number of neurons in first layer

    nn_model.add(tf.keras.layers.Dense(units = hp.Int ('first_units', min_value = 1, max_value = 200, step=2), activation = activation, input_dim = number_input_feature))

# Allow kerastuner to decide number of hidden layers and neurons in hidden layers

    for i in range (hp.Int ('num_layers', 1,6)):
        nn_model.add (tf.keras.layers.Dense(units = hp. Int('units_' + str(i), min_value = 1, max_value =200, step=2), activation = activation))

# Output layer

    nn_model.add (tf.keras.layers.Dense(units = 1, activation = "sigmoid"))

# Compile the model

    nn_model.compile(loss = "binary_crossentropy", optimizer = "adam", metrics= ["accuracy"])

    return nn_model

In [None]:
# Applying Hyperband

tuner = kt.Hyperband (create_model, objective= "val_accuracy", max_epochs= 100, hyperband_iterations= 2)

In [None]:
# Run the kerastuner seach for best hyperparameters
tuner.search(X_train_scaled, y_train, epochs =50, validation_data = (X_test_scaled, y_test))

In [None]:
# Get best model hyperparmeters

best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

In [None]:
# Evaluate best model against full test data 

best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate (X_test_scaled, y_test, verbose =2)
print (f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Export our model to HDF5 file

best_model.save("Resources/car_price_predictor.h5")

## **Deploy Model to Tableau**

In [None]:
# need to be correct
from tabpy.tabpy_tools.client import Client
client = Client('http://localhost:9004/')
client.deploy('Restaurant_Profitability',
Profitability_Prediction,
'Returns prediction of profitability for restaurant(s).'
, override = True)


In [None]:
# Replace values in fuel column
#car_df = car_df.replace(['Petrol', 'Diesel', 'LPG', 'CNG'], 
                     #[0,1,2,3]) 

In [None]:
# replace values in seller column
#car_df = car_df.replace(['Individual', 'Dealer', 'Trustmark Dealer'], 
                     #[0,1,2]) 

In [None]:
# replace values in transmission column
#car_df = car_df.replace(['Manual', 'Automatic'], 
                     #[0,1]) 

In [None]:
#car_df = car_df.replace(['First Owner', 'Second Owner', 'Third Owner', 'Fourth & Above Owner', 'Test Drive Car'], 
                     #[0,1,2,3,4]) 

In [None]:
car_df.dropna(inplace=True)