In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim

In [18]:
# Load and clean the dataset
data_columns = list(range(26))  # Dataset has 26 columns
file_path = "imports-85.data"
auto_data = pd.read_csv(file_path, header=None, names=data_columns, na_values="?")


In [19]:
# Drop rows with missing target values (column 25)
auto_data = auto_data.dropna(subset=[25])

In [20]:
# Convert numeric columns to proper types
numeric_columns = [0, 9, 10, 11, 12, 13, 16, 20, 23, 24, 25]
auto_data[numeric_columns] = auto_data[numeric_columns].apply(pd.to_numeric)

In [21]:
# Separate features and target
X = auto_data.drop(columns=[25])  # Features
y = auto_data[25]  # Target (Price)


In [22]:
# Log-transform the target to stabilize range
y = np.log1p(y)

In [23]:
# Handle categorical columns
categorical_columns = X.select_dtypes(include=["object"]).columns
ohe = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
categorical_encoded = ohe.fit_transform(X[categorical_columns])

In [24]:
# Combine numeric and encoded categorical features
numeric_features = X.select_dtypes(include=["number"]).values
X_processed = np.hstack((numeric_features, categorical_encoded))

In [25]:
# Normalize features
scaler = StandardScaler()
X_processed = scaler.fit_transform(X_processed)

In [26]:
# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

In [27]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [28]:
# Define the MLP model
class MLPRegressor(nn.Module):
    def __init__(self, input_size, hidden_layers):
        super(MLPRegressor, self).__init__()
        layers = []
        for i, hidden_size in enumerate(hidden_layers):
            layers.append(nn.Linear(input_size if i == 0 else hidden_layers[i-1], hidden_size))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_layers[-1], 1))  # Output layer for regression
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [29]:
# Model parameters
input_size = X_train.shape[1]
hidden_layers = [64, 32]
learning_rate = 0.001
batch_size = 32
epochs = 100

In [30]:
# Initialize the model, loss function, and optimizer
model = MLPRegressor(input_size, hidden_layers)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [31]:
# Training loop
def train_model(model, X_train, y_train, batch_size, epochs):
    for epoch in range(epochs):
        model.train()
        permutation = torch.randperm(X_train.size(0))
        epoch_loss = 0
        for i in range(0, X_train.size(0), batch_size):
            indices = permutation[i:i + batch_size]
            batch_X, batch_y = X_train[indices], y_train[indices]

            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {epoch_loss / (X_train.size(0) // batch_size):.4f}")

train_model(model, X_train_tensor, y_train_tensor, batch_size, epochs)

Epoch [1/100], Loss: nan
Epoch [2/100], Loss: nan
Epoch [3/100], Loss: nan
Epoch [4/100], Loss: nan
Epoch [5/100], Loss: nan
Epoch [6/100], Loss: nan
Epoch [7/100], Loss: nan
Epoch [8/100], Loss: nan
Epoch [9/100], Loss: nan
Epoch [10/100], Loss: nan
Epoch [11/100], Loss: nan
Epoch [12/100], Loss: nan
Epoch [13/100], Loss: nan
Epoch [14/100], Loss: nan
Epoch [15/100], Loss: nan
Epoch [16/100], Loss: nan
Epoch [17/100], Loss: nan
Epoch [18/100], Loss: nan
Epoch [19/100], Loss: nan
Epoch [20/100], Loss: nan
Epoch [21/100], Loss: nan
Epoch [22/100], Loss: nan
Epoch [23/100], Loss: nan
Epoch [24/100], Loss: nan
Epoch [25/100], Loss: nan
Epoch [26/100], Loss: nan
Epoch [27/100], Loss: nan
Epoch [28/100], Loss: nan
Epoch [29/100], Loss: nan
Epoch [30/100], Loss: nan
Epoch [31/100], Loss: nan
Epoch [32/100], Loss: nan
Epoch [33/100], Loss: nan
Epoch [34/100], Loss: nan
Epoch [35/100], Loss: nan
Epoch [36/100], Loss: nan
Epoch [37/100], Loss: nan
Epoch [38/100], Loss: nan
Epoch [39/100], Loss:

In [32]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        predictions = model(X_test)
        mse = criterion(predictions, y_test).item()
        print(f"Mean Squared Error (MSE): {mse:.2f}")

evaluate_model(model, X_test_tensor, y_test_tensor)

Mean Squared Error (MSE): nan
