In [182]:
import warnings
warnings.filterwarnings('ignore')

In [183]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import dataframe_image as dfi

In [184]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [185]:
cars_df = pd.read_csv("Car details v3.csv")
cars_df.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14 kmpl,1498 CC,103.52 bhp,250Nm@ 1500-2500rpm,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7 kmpl,1497 CC,78 bhp,"12.7@ 2,700(kgm@ rpm)",5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0 kmpl,1396 CC,90 bhp,22.4 kgm at 1750-2750rpm,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1 kmpl,1298 CC,88.2 bhp,"11.5@ 4,500(kgm@ rpm)",5.0


In [186]:
# Remove rows that have at least 1 null value.
cars_no_Null_df = cars_df.dropna()

print(cars_no_Null_df.shape)
cars_no_Null_df.head(10)

(7906, 13)


Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Swift Dzire VDI,2014,450000,145500,Diesel,Individual,Manual,First Owner,23.4 kmpl,1248 CC,74 bhp,190Nm@ 2000rpm,5.0
1,Skoda Rapid 1.5 TDI Ambition,2014,370000,120000,Diesel,Individual,Manual,Second Owner,21.14 kmpl,1498 CC,103.52 bhp,250Nm@ 1500-2500rpm,5.0
2,Honda City 2017-2020 EXi,2006,158000,140000,Petrol,Individual,Manual,Third Owner,17.7 kmpl,1497 CC,78 bhp,"12.7@ 2,700(kgm@ rpm)",5.0
3,Hyundai i20 Sportz Diesel,2010,225000,127000,Diesel,Individual,Manual,First Owner,23.0 kmpl,1396 CC,90 bhp,22.4 kgm at 1750-2750rpm,5.0
4,Maruti Swift VXI BSIII,2007,130000,120000,Petrol,Individual,Manual,First Owner,16.1 kmpl,1298 CC,88.2 bhp,"11.5@ 4,500(kgm@ rpm)",5.0
5,Hyundai Xcent 1.2 VTVT E Plus,2017,440000,45000,Petrol,Individual,Manual,First Owner,20.14 kmpl,1197 CC,81.86 bhp,113.75nm@ 4000rpm,5.0
6,Maruti Wagon R LXI DUO BSIII,2007,96000,175000,LPG,Individual,Manual,First Owner,17.3 km/kg,1061 CC,57.5 bhp,"7.8@ 4,500(kgm@ rpm)",5.0
7,Maruti 800 DX BSII,2001,45000,5000,Petrol,Individual,Manual,Second Owner,16.1 kmpl,796 CC,37 bhp,59Nm@ 2500rpm,4.0
8,Toyota Etios VXD,2011,350000,90000,Diesel,Individual,Manual,First Owner,23.59 kmpl,1364 CC,67.1 bhp,170Nm@ 1800-2400rpm,5.0
9,Ford Figo Diesel Celebration Edition,2013,200000,169000,Diesel,Individual,Manual,First Owner,20.0 kmpl,1399 CC,68.1 bhp,160Nm@ 2000rpm,5.0


In [187]:
# Split our preprocessed data into our features and target arrays
X = pd.get_dummies(cars_no_Null_df.drop(columns="name").values())
y = cars_no_Null_df["fuel"].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

### Easy Ensemble AdaBoost Classifier

In [188]:
# Train the EasyEnsembleClassifier
from imblearn.ensemble import EasyEnsembleClassifier

eeac = EasyEnsembleClassifier(n_estimators=100, random_state=1)

eeac.fit(X_train, y_train)

EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [189]:
# Calculated the balanced accuracy score
y_pred = eeac.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

1.0

In [190]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[  11,    0,    0,    0],
       [   0, 1101,    0,    0],
       [   0,    0,    8,    0],
       [   0,    0,    0,  857]], dtype=int64)

In [191]:
# Create a DataFrame from the confusion matrix.
EEAC_cm = confusion_matrix(y_test, y_pred)
EEAC_cm_df = pd.DataFrame(
    EEAC_cm, 
    index=["Actual CNG", "Actual Diesel", "Actual LPG", "Actual Petrol"], 
    columns=["Predicted CNG", "Predicted Diesel", "Predicted LPG", "Predicted Petrol"])

EEAC_cm_df

Unnamed: 0,Predicted CNG,Predicted Diesel,Predicted LPG,Predicted Petrol
Actual CNG,11,0,0,0
Actual Diesel,0,1101,0,0
Actual LPG,0,0,8,0
Actual Petrol,0,0,0,857


In [192]:
# Print the imbalanced classification report
print("Easy Ensemble AdaBoost Classifier")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Easy Ensemble AdaBoost Classifier
Accuracy: 100.00% 


                   pre       rec       spe        f1       geo       iba       sup

        CNG       1.00      1.00      1.00      1.00      1.00      1.00        11
     Diesel       1.00      1.00      1.00      1.00      1.00      1.00      1101
        LPG       1.00      1.00      1.00      1.00      1.00      1.00         8
     Petrol       1.00      1.00      1.00      1.00      1.00      1.00       857

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      1977



### Balanced Random Forest Classifier

In [193]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

brfc.fit(X_train, y_train)

BalancedRandomForestClassifier(random_state=1)

In [194]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

1.0

In [195]:
# Display the confusion matrix
confusion_matrix(y_test, y_pred)

array([[  11,    0,    0,    0],
       [   0, 1101,    0,    0],
       [   0,    0,    8,    0],
       [   0,    0,    0,  857]], dtype=int64)

In [196]:
# Create a DataFrame from the confusion matrix.
BRFC_cm = confusion_matrix(y_test, y_pred)

BRFC_cm_df = pd.DataFrame(
    BRFC_cm, 
    index=["Actual CNG", "Actual Diesel", "Actual LPG", "Actual Petrol"], 
    columns=["Predicted CNG", "Predicted Diesel", "Predicted LPG", "Predicted Petrol"])

BRFC_cm_df

Unnamed: 0,Predicted CNG,Predicted Diesel,Predicted LPG,Predicted Petrol
Actual CNG,11,0,0,0
Actual Diesel,0,1101,0,0
Actual LPG,0,0,8,0
Actual Petrol,0,0,0,857


In [197]:
# Print the imbalanced classification report
print("Balanced Random Forest Classifier")
print("\n")
print(f"Accuracy: { balanced_accuracy_score(y_test, y_pred) * 100:.2f}% \n\n")
print(classification_report_imbalanced(y_test, y_pred))

Balanced Random Forest Classifier


Accuracy: 100.00% 


                   pre       rec       spe        f1       geo       iba       sup

        CNG       1.00      1.00      1.00      1.00      1.00      1.00        11
     Diesel       1.00      1.00      1.00      1.00      1.00      1.00      1101
        LPG       1.00      1.00      1.00      1.00      1.00      1.00         8
     Petrol       1.00      1.00      1.00      1.00      1.00      1.00       857

avg / total       1.00      1.00      1.00      1.00      1.00      1.00      1977



### Deep Learning Neural Model

In [198]:
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf


In [199]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [200]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 80)                103520    
                                                                 
 dense_16 (Dense)            (None, 30)                2430      
                                                                 
 dense_17 (Dense)            (None, 1)                 31        
                                                                 
Total params: 105,981
Trainable params: 105,981
Non-trainable params: 0
_________________________________________________________________


In [201]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch', 
    period=5)



In [202]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])

Epoch 1/100


UnimplementedError: Graph execution error:

Detected at node 'binary_crossentropy/Cast' defined at (most recent call last):
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 541, in run_forever
      self._run_once()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\asyncio\base_events.py", line 1786, in _run_once
      handle._run()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\asyncio\events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2915, in run_cell
      raw_cell, store_history, silent, shell_futures)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
      return runner(coro)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3186, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\ALVINT~1\AppData\Local\Temp/ipykernel_21572/2206397254.py", line 2, in <module>
      fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 860, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\training.py", line 919, in compute_loss
      y, y_pred, sample_weight, regularization_losses=self.losses)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Alvin Tran\anaconda3\envs\mlenv\lib\site-packages\keras\losses.py", line 1922, in binary_crossentropy
      y_true = tf.cast(y_true, y_pred.dtype)
Node: 'binary_crossentropy/Cast'
Cast string to float is not supported
	 [[{{node binary_crossentropy/Cast}}]] [Op:__inference_train_function_2746]

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")