In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

import pandas as pd
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate   # for clean tables

In [2]:
# Load Data

DATA_FILENAME = "car_data.csv"
df_raw = pd.read_csv(DATA_FILENAME)

print("Raw shape:", df_raw.shape)
print(df_raw.head())


Raw shape: (301, 9)
  Car_Name  Year  Selling_Price  Present_Price  Kms_Driven Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Seller_Type Transmission  Owner  
0      Dealer       Manual      0  
1      Dealer       Manual      0  
2      Dealer       Manual      0  
3      Dealer       Manual      0  
4      Dealer       Manual      0  


In [3]:
# Preprocessing

def preprocess_dataset(df):
    df = df.copy()

    # Drop Car_Name (not useful for regression directly)
    if "Car_Name" in df.columns:
        df = df.drop(columns=["Car_Name"])
    
    # Separate numeric + categorical
    numeric_cols = ["Year", "Present_Price", "Kms_Driven", "Owner"]
    categorical_cols = ["Fuel_Type", "Seller_Type", "Transmission"]
    target_col = ["Selling_Price"]

    # One-hot encode categorical
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

    # Scale numeric columns
    scaler = StandardScaler()
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

    return df, numeric_cols, target_col, scaler


df, numeric_cols, target_col, scaler = preprocess_dataset(df_raw)
print("After preprocessing:", df.shape)



After preprocessing: (301, 9)


In [4]:
# 4. Convert to Tensors (Fixed)
# =============================
X = df.drop(columns=target_col)
y = df[target_col]

# Ensure all columns are numeric
X = X.apply(pd.to_numeric, errors="coerce")
y = y.apply(pd.to_numeric, errors="coerce")

# Replace NaNs if any (just in case)
X = X.fillna(0)
y = y.fillna(0)

# Convert to numpy float arrays
inputs = torch.tensor(X.to_numpy(dtype="float32"))
targets = torch.tensor(y.to_numpy(dtype="float32"))

dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [int(0.8*len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64)


In [5]:
# 5. Define Model
# ===============
class CarPriceModel(nn.Module):
    def __init__(self, input_size, output_size=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_size)
        )
    def forward(self, xb):
        return self.net(xb)



In [6]:
# 6. Training Utilities
# =====================
def evaluate(model, val_loader, loss_fn):
    model.eval()
    with torch.no_grad():
        losses = [loss_fn(model(xb), yb) for xb, yb in val_loader]
    return torch.stack(losses).mean().item()


def fit(epochs, model, train_loader, val_loader, lr=1e-3):
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    history = []

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()

        val_loss = evaluate(model, val_loader, loss_fn)
        history.append(val_loss)
        if (epoch+1) % 10 == 0:
            print(f"Epoch {epoch+1}, Val Loss: {val_loss:.4f}")
    return history


In [7]:
# 7. Train the Model
# ==================
model = CarPriceModel(input_size=X.shape[1])
history = fit(100, model, train_loader, val_loader, lr=1e-3)


Epoch 10, Val Loss: 43.3669
Epoch 20, Val Loss: 10.6137
Epoch 30, Val Loss: 3.1057
Epoch 40, Val Loss: 6.5685
Epoch 50, Val Loss: 7.4249
Epoch 60, Val Loss: 7.4971
Epoch 70, Val Loss: 7.1453
Epoch 80, Val Loss: 6.6575
Epoch 90, Val Loss: 6.3768
Epoch 100, Val Loss: 5.9752


In [8]:
# 8. User Input + Prediction
# ==========================
def preprocess_user_input(car_name, year, present_price, kms_driven, fuel_type, transmission, owner):
    user_df = pd.DataFrame({
        "Year": [year],
        "Present_Price": [present_price],
        "Kms_Driven": [kms_driven],
        "Owner": [owner],
        "Fuel_Type": [fuel_type],
        "Seller_Type": ["Dealer"],   # assume Dealer
        "Transmission": [transmission]
    })

    # One-hot encode
    user_df = pd.get_dummies(user_df, drop_first=True)

    # Add missing cols
    for col in X.columns:
        if col not in user_df.columns:
            user_df[col] = 0

    # Reorder
    user_df = user_df[X.columns]

    # Scale numeric
    user_df[numeric_cols] = scaler.transform(user_df[numeric_cols])

    return torch.tensor(user_df.to_numpy(dtype=float), dtype=torch.float32)


def predict_and_display(car_name, year, present_price, kms_driven, fuel_type, transmission, owner, model):
    user_tensor = preprocess_user_input(car_name, year, present_price, kms_driven, fuel_type, transmission, owner)
    pred = model(user_tensor).item()

    table = [[car_name, year, present_price, kms_driven, fuel_type, transmission, owner, round(pred, 2)]]
    headers = ["Car Name", "Year", "Present Price (Lakhs)", "Kms Driven", "Fuel Type", "Transmission", "Owner", "Predicted Price (Lakhs)"]

    print("\n=== Prediction Result ===")
    print(tabulate(table, headers=headers, tablefmt="grid"))



In [None]:
# 9. Example Run
# ==============
car_name = input("Enter Car Name: ")
year = int(input("Enter Year: "))
present_price = float(input("Enter Present Price (Lakhs): "))
kms_driven = int(input("Enter Kms Driven: "))
fuel_type = input("Enter Fuel Type (Petrol/Diesel/CNG): ")
transmission = input("Enter Transmission (Manual/Automatic): ")
owner = int(input("Enter Number of Previous Owners: "))

predict_and_display(car_name, year, present_price, kms_driven, fuel_type, transmission, owner, model)
