# Task: Car Price Prediction

## Import libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Read data

In [None]:
df = pd.read_csv('CarPrice.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

## Check null values

## Data Preprocessing

In [None]:
df['CarName'].nunique()

## Label encoder

In [None]:
from sklearn.preprocessing import LabelEncoder

### encode the categorical features

In [None]:
cat_features = [feature for feature in df.columns if df[feature].dtype == 'object']
cat_features

In [None]:
encoder = LabelEncoder()

for feature in cat_features:
    df[feature] = encoder.fit_transform(df[feature])

In [None]:
df['CarName'].nunique()

## Get the x and y data

In [None]:
x = df.iloc[:, 1:-1]
x = x.drop('CarName', axis = 1)
y = pd.DataFrame(df['price'])

## Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [None]:
x_scaled = sc.fit_transform(x)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size = 0.2, random_state = 0)

In [None]:
n_samples = x_train.shape[0]
n_featuers = x_train.shape[1]
print(f'n_samples: {n_samples}, n_features: {n_featuers}')

In [None]:
import tensorflow.keras.backend as K
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 

# Same regression model as reference

In [None]:
model = Sequential()
model.add(Dense(128 , activation='relu', input_shape=(n_featuers,))) # Input Layer

model.add(Dense(64 , activation='relu'))

model.add(Dense(32 , activation='relu'))

model.add(Dense(32 , activation='relu'))

model.add(Dense(8 , activation='relu'))

model.add(Dense(8 , activation='relu'))

model.add(Dense(1))
model.compile(optimizer='adam',
              loss=root_mean_squared_error,
              metrics=[tf.keras.metrics.RootMeanSquaredError()]) # Compiling Model

In [None]:
history = model.fit(x_train, y_train, validation_split=0.1,
                            batch_size = 64, epochs = 100)

# Binary classification model

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv("CarPrice.csv")

# Encode all object (categorical) columns with LabelEncoder
cat_features = [feature for feature in df.columns if df[feature].dtype == 'object']
encoder = LabelEncoder()

for feature in cat_features:
    df[feature] = encoder.fit_transform(df[feature])

# Let's say we are predicting fueltype (0 for gas, 1 for diesel).
# Make sure "fueltype" is indeed 0/1 if not already:
# df['fueltype'] = df['fueltype'].map({'gas': 0, 'diesel': 1}) 

# Check how many unique CarName values
print("Unique CarName count:", df['CarName'].nunique())

# Let's pick our features: everything from column 1 to second-last,
# then drop 'CarName' if it's not useful as a feature
X = df.iloc[:, 1:-1]  # or define your own subset of columns
X = X.drop('CarName', axis=1, errors='ignore')  # Only drop if CarName is in X

# Now define y as the fueltype for binary classification
# df.columns might differ, so adjust accordingly
y = df['fueltype']

# Scale X
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

# Train/test split
x_train, x_test, y_train, y_test = train_test_split(X_scaled, y, 
                                                    test_size=0.2, 
                                                    random_state=0)
n_samples = x_train.shape[0]
n_features = x_train.shape[1]
print(f'n_samples: {n_samples}, n_features: {n_features}')



In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# Define a binary classification model
model_binary = Sequential()
model_binary.add(Dense(128, activation='relu',
                       input_shape=(n_features,),
                       kernel_regularizer='l2'))
model_binary.add(Dense(64, activation='relu', kernel_regularizer='l2'))
model_binary.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_binary.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_binary.add(Dense(8, activation='relu'))
model_binary.add(Dense(8, activation='relu'))

# Final layer: 1 neuron + sigmoid for binary classification
model_binary.add(Dense(1, activation='sigmoid'))

model_binary.compile(
    optimizer='adam',
    loss='binary_crossentropy',  # Binary crossentropy for binary classification
    metrics=['accuracy']
)

print(model_binary.summary())

# Train
history = model_binary.fit(
    x_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=10
)

# Evaluate on test set
loss, acc = model_binary.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {acc:.4f}")


# Multi-class model

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# 1) Load Data
df = pd.read_csv("CarPrice.csv")

# 2) Choose a multi-class column to predict (e.g. 'carbody')
#    Suppose 'carbody' has 5 unique values, which is typical (sedan, wagon, hatchback, etc.)
#    Convert it into an integer (0..4), then one-hot encode for categorical crossentropy.
df['carbody'] = df['carbody'].astype('category')
df['carbody_code'] = df['carbody'].cat.codes  # e.g. 0..4
y = to_categorical(df['carbody_code'])        # shape becomes (num_samples, 5)

# 3) Choose Features (drop or keep columns as needed)
#    For simplicity, let's assume we drop 'car_ID' or other ID-like columns:
features = [

    'fueltype', 'aspiration', 'wheelbase', 'enginesize', 'horsepower',
    'citympg', 'highwaympg'
   
]

X = df[features]

# Optional: encode or scale as needed.
# If 'fueltype' or 'aspiration' are still strings, do LabelEncoder or get_dummies:
for col in ['fueltype', 'aspiration']:
    if X[col].dtype == object:
        X[col] = LabelEncoder().fit_transform(X[col])

# 4) Train/Test Split
x_train, x_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

# 5) Define n_features for the model's input shape
n_features = x_train.shape[1]
print("n_features:", n_features)

# ----------------------------------------------------------------
# Model Creation
# ----------------------------------------------------------------

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

model_multiclass = Sequential()
model_multiclass.add(Dense(128, activation='relu', 
                           input_shape=(n_features,), 
                           kernel_regularizer='l2'))
model_multiclass.add(Dense(64, activation='relu', kernel_regularizer='l2'))
model_multiclass.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_multiclass.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_multiclass.add(Dense(8, activation='relu'))
model_multiclass.add(Dense(8, activation='relu'))
# Final layer: 5 neurons + softmax for multi-class
model_multiclass.add(Dense(5, activation='softmax'))

model_multiclass.compile(
    optimizer='adam',
    loss='categorical_crossentropy',  # Categorical crossentropy
    metrics=['accuracy']
)

history = model_multiclass.fit(
    x_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=10
)

# Evaluate on test set
loss, acc = model_multiclass.evaluate(x_test, y_test)  # Use model_multiclass, not model_binary
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {acc:.4f}")


# Multi-label output

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 1) Load the dataset
df = pd.read_csv("CarPrice.csv")

# Example multi-label setup:
# Let's say we want to predict three binary columns:
#  - fueltype: gas (0) vs. diesel (1)
#  - aspiration: std (0) vs. turbo (1)
#  - doornumber: two (0) vs. four (1)

# Convert each to binary (0/1). If they’re already numeric, skip or adjust as needed:
df['fueltype_bin'] = df['fueltype'].map({'gas': 0, 'diesel': 1})
df['aspiration_bin'] = df['aspiration'].map({'std': 0, 'turbo': 1})
df['doornumber_bin'] = df['doornumber'].map({'two': 0, 'four': 1})

# 2) Select the multi-label target (each label is a column in y)
y = df[['fueltype_bin', 'aspiration_bin', 'doornumber_bin']].values
print("y shape:", y.shape)  # (num_samples, 3)

# 3) Choose the feature columns for X
#    For example, let’s keep some numeric/categorical columns you want to use.
#    Make sure to encode any categorical columns in X.
features = [
    # Example numeric or already encoded columns
    'wheelbase', 'enginesize', 'horsepower', 'citympg', 'highwaympg',
    # Possibly keep 'carbody' or 'drivewheel' if you encode them first.
]
X = df[features]

# (Optional) Encode or scale X if needed:
# If any columns here are strings, do label-encoding or get_dummies:
# for col in ['carbody', 'drivewheel', ...]:
#     X[col] = LabelEncoder().fit_transform(X[col])

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4) Train/test split
x_train, x_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

n_features = x_train.shape[1]
print("n_features:", n_features)

# ----------------------------------------------------------------
# Model Creation 
# ----------------------------------------------------------------

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

model_multilabel = Sequential()
model_multilabel.add(Dense(128, activation='relu', 
                           input_shape=(n_features,), 
                           kernel_regularizer='l2'))
model_multilabel.add(Dense(64, activation='relu', kernel_regularizer='l2'))
model_multilabel.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_multilabel.add(Dense(32, activation='relu', kernel_regularizer='l2'))
model_multilabel.add(Dense(8, activation='relu'))
model_multilabel.add(Dense(8, activation='relu'))
# Final layer: 3 neurons, each with a sigmoid for multi-label
model_multilabel.add(Dense(3, activation='sigmoid'))

model_multilabel.compile(
    optimizer='adam',
    loss='binary_crossentropy',  # Each label is binary
    metrics=['accuracy']
)

history = model_multilabel.fit(
    x_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=10
)

# Evaluate on test set
loss, acc = model_multilabel.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {acc:.4f}")
