In [9]:
from typing import Tuple

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch import Tensor
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt

In [10]:
df = pd.read_csv(r"C:\Users\Admin\Downloads\car_price_prediction_ (1).csv")

In [11]:
df

Unnamed: 0,Car ID,Brand,Year,Engine Size,Fuel Type,Transmission,Mileage,Condition,Price,Model
0,1,Tesla,2016,2.3,Petrol,Manual,114832,New,26613.92,Model X
1,2,BMW,2018,4.4,Electric,Manual,143190,Used,14679.61,5 Series
2,3,Audi,2013,4.5,Electric,Manual,181601,New,44402.61,A4
3,4,Tesla,2011,4.1,Diesel,Automatic,68682,New,86374.33,Model Y
4,5,Ford,2009,2.6,Diesel,Manual,223009,Like New,73577.10,Mustang
...,...,...,...,...,...,...,...,...,...,...
2495,2496,Audi,2020,2.4,Petrol,Automatic,22650,Like New,61384.10,Q5
2496,2497,Audi,2001,5.7,Hybrid,Manual,77701,Like New,24710.35,A3
2497,2498,Ford,2021,1.1,Hybrid,Manual,272827,Like New,29902.45,Fiesta
2498,2499,Audi,2002,4.5,Diesel,Manual,229164,Like New,46085.67,Q5


In [12]:
### 1. LOAD PRAJME I PREPROCESS NA DATA OK PRVO

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer


# 1. Prvo sekogas split prajme na data odnosno ja delime na features i target
X = df.drop(columns=["Price","Car ID"]) 
y = df["Price"]

# 2. Identifikacija im prajme na site categoriski i numericki koloni 

cat_cols = X.select_dtypes(include="object").columns.tolist()
num_cols = [col for col in X.columns if col not in cat_cols]

# Gi transformirame kategoriskite podatoci
# vo Column transformer prvo ni ojt imeto, Encoder, kolona na to sho sakame da enkodirame

preprocesor = ColumnTransformer(transformers=[("cat",OneHotEncoder(drop="first",handle_unknown='ignore'),cat_cols),("num",StandardScaler(),num_cols)])
X_processed = preprocesor.fit_transform(X)
X_processed = X_processed.toarray()
# Split vo train/val/test

X_trainval,X_test,y_trainval,y_test = train_test_split(X_processed,y,test_size=0.2,random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_trainval,y_trainval,test_size=0.2,random_state=42)

In [19]:
### 2. KREIRAME PYTROCH DATASET 

In [21]:
import torch
from torch.utils.data import Dataset, DataLoader

class CarDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.to_numpy().reshape(-1, 1), dtype=torch.float32)


    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Create datasets and loaders
train_dataset = CarDataset(X_train, y_train)
val_dataset = CarDataset(X_val, y_val)
test_dataset = CarDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)


In [22]:
import torch.nn as nn

def build_model_1(input_dim):
    return nn.Sequential(
        nn.Linear(input_dim, 64),
        nn.ReLU(),
        nn.Linear(64, 32),
        nn.ReLU(),
        nn.Linear(32, 1)  # output single value for regression
    )

In [23]:
def build_model_2(input_dim):
    return nn.Sequential(
        nn.Linear(input_dim, 128),
        nn.ReLU(),
        nn.Dropout(0.3),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(64, 1)
    )


In [24]:
def build_model_3(input_dim):
    return nn.Sequential(
        nn.Linear(input_dim, 128),
        nn.BatchNorm1d(128),
        nn.LeakyReLU(),
        nn.Linear(128, 64),
        nn.BatchNorm1d(64),
        nn.LeakyReLU(),
        nn.Linear(64, 32),
        nn.LeakyReLU(),
        nn.Linear(32, 1)
    )


In [32]:
def train_one_epoch(model,loader,criterion,optimizer):
    model.train(),
    total_loss=0.0
    for X_batch,y_batch in loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output,y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss/len(loader)

In [33]:
def evaluate(model,loader,criterion):
    model.eval()
    total_loss=0.0
    with torch.no_grad():
        for X_batch,y_batch in loader:
            output = model(X_batch)
            loss = criterion(output,y_batch)
            total_loss += loss.item()

    return total_loss/len(loader)

In [34]:
import torch.optim as optim

input_dim = X_train.shape[1]
models = {
    "Model 1": build_model_1(input_dim),
    "Model 2": build_model_2(input_dim),
    "Model 3": build_model_3(input_dim)
}

num_epochs = 50
criterion = nn.MSELoss()  # regression
results = {}

for name, model in models.items():
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train_losses, val_losses = [], []
    
    for epoch in range(num_epochs):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss = evaluate(model, val_loader, criterion)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
    
    results[name] = {"train_losses": train_losses, "val_losses": val_losses, "model": model}

# Compare models by lowest validation loss
for name, r in results.items():
    print(f"{name}: Final val loss = {r['val_losses'][-1]:.2f}")


Model 1: Final val loss = 874810417.23
Model 2: Final val loss = 858543266.46
Model 3: Final val loss = 1188534050.46


In [35]:
best_model_name = min(results, key=lambda k: results[k]["val_losses"][-1])
best_model = results[best_model_name]["model"]

# Evaluate on test set
test_loss = evaluate(best_model, test_loader, criterion)
print(f"Best model: {best_model_name}, Test MSE: {test_loss:.2f}")


Best model: Model 2, Test MSE: 884064460.00
