In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

from google.colab import drive
drive.mount('/content/drive')


file_path = '/content/drive/MyDrive/Automobile_price_data_Raw_set.csv'
df = pd.read_csv(file_path)

print("Initial shape of data:", df.shape)
print("Initial columns:", df.columns)


# Replacing ±inf with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Dropping rows containing NaN
df.dropna(inplace=True)

# Converting 'price' (if not numeric) to float
df['price'] = df['price'].astype(float)

# Defining features and target
features = ["make", "body-style", "wheel-base", "engine-size",
            "horsepower", "peak-rpm", "highway-mpg"]
target = "price"

# Subsetting our DataFrame
df_model = df[features + [target]].copy()
print("Shape after selecting columns:", df_model.shape)

# Separating X (features) and y (target)
X = df_model[features].copy()
y = df_model[target].copy()

#  Labelling-encode 'make'
label_encoder = LabelEncoder()
X['make'] = label_encoder.fit_transform(X['make'])

# One-hot encoding 'body-style'
X = pd.get_dummies(X, columns=['body-style'], drop_first=True)
print("Columns after encoding:", X.columns)


numeric_cols = ["wheel-base", "engine-size", "horsepower", "peak-rpm", "highway-mpg"]

scaler = MinMaxScaler(feature_range=(0, 1))
X[numeric_cols] = scaler.fit_transform(X[numeric_cols])

# Checking result
print("\nPreview of scaled features (head):")
print(X.head())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("\nTrain set shape:", X_train.shape, "Test set shape:", X_test.shape)

learning_rate = 0.01
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=[X_train.shape[1]]),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1)  # Predicting price (single numeric output)
])

model.compile(optimizer=optimizer, loss='mse', metrics=['mse'])

# Fitting model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=0
)

# Evaluating on test set
dnn_eval = model.evaluate(X_test, y_test, verbose=0)
dnn_mse = dnn_eval[1]  # 'mse' is the second element
print("\n--- DNN Results ---")
print("DNN MSE on Test Set:", dnn_mse)
print("DNN RMSE on Test Set:", np.sqrt(dnn_mse))

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Evaluating on test set
y_pred_lin = lin_reg.predict(X_test)
lin_mse = mean_squared_error(y_test, y_pred_lin)
lin_r2 = r2_score(y_test, y_pred_lin)

print("\n--- Linear Regression Results ---")
print("MSE on Test Set:", lin_mse)
print("RMSE on Test Set:", np.sqrt(lin_mse))
print("R^2 on Test Set:", lin_r2)


sample_data = {
    "make": ["audi"],
    "body-style": ["hatchback"],
    "wheel-base": [99.5],
    "engine-size": [131],
    "horsepower": [160],
    "peak-rpm": [5500],
    "highway-mpg": [22]
}
sample_df = pd.DataFrame(sample_data)


sample_df['make'] = label_encoder.transform(sample_df['make'])


sample_df = pd.get_dummies(sample_df, columns=["body-style"], drop_first=True)


sample_df = sample_df.reindex(columns=X.columns, fill_value=0)


sample_df[numeric_cols] = scaler.transform(sample_df[numeric_cols])

# DNN prediction
dnn_pred = model.predict(sample_df)[0][0]

# LinearRegression prediction
lin_pred = lin_reg.predict(sample_df)[0]

print("\n--- Single-Sample Prediction ---")
print("DNN predicted price:", dnn_pred)
print("Linear Regression predicted price:", lin_pred)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Initial shape of data: (205, 26)
Initial columns: Index(['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
       'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
       'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',
       'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',
       'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',
       'highway-mpg', 'price'],
      dtype='object')
Shape after selecting columns: (153, 8)
Columns after encoding: Index(['make', 'wheel-base', 'engine-size', 'horsepower', 'peak-rpm',
       'highway-mpg', 'body-style_hardtop', 'body-style_hatchback',
       'body-style_sedan', 'body-style_wagon'],
      dtype='object')

Preview of scaled features (head):
    make  wheel-base  engine-size  horsepower  peak-rpm  highway-mpg  \
3      0    0.4551

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



--- DNN Results ---
DNN MSE on Test Set: 8835337.0
DNN RMSE on Test Set: 2972.429477716839

--- Linear Regression Results ---
MSE on Test Set: 5768322.2627685685
RMSE on Test Set: 2401.733178929035
R^2 on Test Set: 0.8115309948997871
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step

--- Single-Sample Prediction ---
DNN predicted price: 26042.861
Linear Regression predicted price: 27685.29600715845

Done!
