# Car Price Prediction with TensorFlow

**NoteBook Link**: https://www.kaggle.com/code/paritkansal/car-price-prediction-with-tensorflow

**Dataset Link**: https://www.kaggle.com/datasets/mayankpatel14/second-hand-used-cars-data-set-linear-regression

## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.layers import Dense, InputLayer, Normalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError
from tensorflow.keras.metrics import RootMeanSquaredError
import matplotlib.pyplot as plt

## Download Data

In [None]:
data = pd.read_csv("/kaggle/input/second-hand-used-cars-data-set-linear-regression/train.csv")
data.head()

In [None]:
sns.pairplot(data[['years', 'km', 'rating', 'condition', 'economy', 'top speed', 'hp', 'torque', 'current price']], diag_kind='kde')

## Data Preprocessing

### DataFrame To Tensor

In [None]:
tensor_data = tf.constant(data)
tensor_data = tf.cast(tensor_data, tf.float32)
tensor_data = tf.random.shuffle(tensor_data)
print(tensor_data)

### Finding X

In [None]:
X = tensor_data[:,3:-1]
X.shape

### Finding y

In [None]:
y = tensor_data[:,-1:]
y.shape

## Train, Test, Valid Data

In [None]:
# Define dataset ratios
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1
DATASET_SIZE = len(X)

In [None]:
# Splitting the data
X_train = X[:int(DATASET_SIZE * TRAIN_RATIO)]
y_train = y[:int(DATASET_SIZE * TRAIN_RATIO)]

X_val = X[int(DATASET_SIZE * TRAIN_RATIO):int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO))]
y_val = y[int(DATASET_SIZE * TRAIN_RATIO):int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO))]

X_test = X[int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO)):]
y_test = y[int(DATASET_SIZE * (TRAIN_RATIO + VAL_RATIO)):]

In [None]:
# Creating datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=256, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.batch(32).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.batch(32).prefetch(tf.data.AUTOTUNE)


## Defining Model

In [None]:
# Normalizer
normalizer = Normalization()
normalizer.adapt(X_train)

# Model Definition
model = tf.keras.Sequential([
    InputLayer(shape=(8,)),  # Dynamically infer input size
    normalizer,
    Dense(128, activation="relu"),
    Dense(128, activation="relu"),
    Dense(128, activation="relu"),
    Dense(1)
])
model.summary()

## Compiling Model

In [None]:
# Compile the model
model.compile(
    optimizer=Adam(learning_rate=0.001),  # Lower learning rate for stability
    loss=MeanAbsoluteError(),
    metrics=[RootMeanSquaredError()]
)

## Model Fitting

In [None]:
# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=100,
    verbose=1
)

## Visualization of Training and Validation Data

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train_loss', 'val_loss'])
plt.show()

In [None]:
plt.plot(history.history['root_mean_squared_error'])
plt.plot(history.history['val_root_mean_squared_error'])
plt.title('model performance')
plt.ylabel('rmse')
plt.xlabel('epoch')
plt.legend(['train_RMSE', 'val_RMSE'])
plt.show()

## Model Testing

In [None]:
model.evaluate(test_dataset)

In [None]:
from sklearn.metrics import r2_score

y_pred = model.predict(test_dataset)  # Predicted values
y_test_actual = tf.concat([y for x, y in test_dataset], axis=0).numpy()  # Actual values from the test dataset

# Calculate R² score
r2 = r2_score(y_test_actual, y_pred)
print(f"R² Score: {r2:.4f}")

In [None]:
model.predict(test_dataset).shape

In [None]:
y_pred = model.predict(test_dataset)

# Flatten the predictions to a 1D array
y_pred_flat = y_pred.flatten()

# Get the actual values (y_test) from the test dataset
y_test_actual = tf.concat([y for x, y in test_dataset], axis=0).numpy()
y_test_actual_flat = y_test_actual.flatten()

# Create the indices for the bar chart
nd = np.arange(len(y_pred_flat))

# Plotting the bar chart
plt.figure(figsize=(20, 10))

width = 0.1  # Width of the bars

# Plotting the bars for predicted and actual values
plt.bar(nd, y_pred_flat, width, label='Predicted Car Price')
plt.bar(nd + width, y_test_actual_flat, width, label='Actual Car Price')

# Labeling the axes
plt.xlabel('Car Instances')
plt.ylabel('Car Prices')

# Adding a title
plt.title('Actual vs Predicted Car Prices')

# Adding the legend
plt.legend()

# Show the plot
plt.show()