In [10]:
# 📦 Import needed libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

# 📥 Load the dataset

df = pd.read_csv("CarPrice.csv")

# 🛠 Fix typos in CarName
corrections = {
    "maxda": "mazda",
    "vw": "volkswagen",
    "vokswagen": "volkswagen",
    "porcshce": "porsche",
    "toyouta": "toyota",
    "audi 100ls": "audi 100 ls"
}

def fix_car_name(name):
    name = name.lower().strip()
    for wrong, correct in corrections.items():
        if name.startswith(wrong):
            name = name.replace(wrong, correct, 1)
    return name

df['CarName'] = df['CarName'].map(fix_car_name)

# ✅ Select features and target
features = [
    'carwidth', 'curbweight', 'enginesize', 'horsepower',
    'highwaympg', 'citympg', 'wheelbase', 'carbody',
    'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem'
]
target = 'price'

# 🧪 Clean and encode data
df = df[features + [target]].dropna()
df = pd.get_dummies(df, columns=[
    'carbody', 'drivewheel', 'enginelocation',
    'enginetype', 'cylindernumber', 'fuelsystem'
], drop_first=True)

# 📊 Split features and label
X = df.drop(target, axis=1).values
y = df[target].values.reshape(-1, 1)

# 🔀 Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# ⚖️ Normalize
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# 🔄 Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)

# 🧠 Define model
class CarPriceModel(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.linear(x)

model = CarPriceModel(X_train_tensor.shape[1])

# ⚙️ Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 🔁 Training
epochs = 500
for epoch in range(epochs):
    model.train()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}")

# 🧪 Evaluation
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor)

    # Inverse scale to original price values
    y_pred_rescaled = scaler_y.inverse_transform(y_pred.numpy())
    y_true_rescaled = scaler_y.inverse_transform(y_test_tensor.numpy())

    # ✅ Evaluation Metrics
    # --- THIS IS THE FIX ---
    rmse = np.sqrt(mean_squared_error(y_true_rescaled, y_pred_rescaled))
    mape = mean_absolute_percentage_error(y_true_rescaled, y_pred_rescaled)
    r2 = r2_score(y_true_rescaled, y_pred_rescaled)

    print("\n📊 Evaluation Metrics:")
    print(f"✅ RMSE: {rmse:.2f}")
    print(f"✅ MAPE: {mape*100:.2f}%")
    print(f"✅ R² Score: {r2:.4f}")

Epoch 0 | Loss: 1.8659
Epoch 50 | Loss: 0.1086
Epoch 100 | Loss: 0.0875
Epoch 150 | Loss: 0.0817
Epoch 200 | Loss: 0.0783
Epoch 250 | Loss: 0.0758
Epoch 300 | Loss: 0.0738
Epoch 350 | Loss: 0.0722
Epoch 400 | Loss: 0.0708
Epoch 450 | Loss: 0.0697

📊 Evaluation Metrics:
✅ RMSE: 3042.48
✅ MAPE: 19.74%
✅ R² Score: 0.8664
